Skip to content

Commit 700adbe

Browse files
author
Josef Bacik
committed
a bunch of random scripts from this weeks work
1 parent 2932980 commit 700adbe

File tree

4 files changed

+607
-0
lines changed

4 files changed

+607
-0
lines changed

bio-and-req-sizes.py

+204
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
from bcc import BPF
2+
import glob
3+
import os
4+
import re
5+
import time
6+
import argparse
7+
from time import sleep
8+
import signal
9+
import ctypes as ct
10+
11+
debug = 0
12+
13+
bpf_text = """
14+
#include <uapi/linux/ptrace.h>
15+
#include <linux/blkdev.h>
16+
#include <linux/genhd.h>
17+
#include <linux/device.h>
18+
#include <linux/kdev_t.h>
19+
#include <linux/uio.h>
20+
21+
typedef struct request_size_s {
22+
u64 size;
23+
u64 read;
24+
} request_size_t;
25+
26+
BPF_PERF_OUTPUT(bio_events);
27+
BPF_PERF_OUTPUT(iter_events);
28+
BPF_PERF_OUTPUT(req_events);
29+
BPF_PERF_OUTPUT(split_events);
30+
31+
// This sucks, but we have no better solution
32+
static dev_t get_devt(struct request *req)
33+
{
34+
struct gendisk *disk = req->rq_disk;
35+
return disk->part0.__dev.devt;
36+
}
37+
38+
int trace_req_start(struct pt_regs *ctx, struct request *req)
39+
{
40+
dev_t device = get_devt(req);
41+
int major = MAJOR(device);
42+
int minor = MINOR(device);
43+
44+
if (!(CONDITIONALS))
45+
return 0;
46+
request_size_t data = {
47+
.size = req->__data_len,
48+
.read = !(req->cmd_flags & 1),
49+
};
50+
req_events.perf_submit(ctx, &data, sizeof(data));
51+
return 0;
52+
}
53+
54+
int trace_bio_split(struct pt_regs *ctx, struct bio *bio, int nr_sectors)
55+
{
56+
dev_t device = bio->bi_bdev->bd_disk->part0.__dev.devt;
57+
int major = MAJOR(device);
58+
int minor = MINOR(device);
59+
60+
if (!(CONDITIONALS))
61+
return 0;
62+
request_size_t data = {
63+
.size = nr_sectors << 9,
64+
.read = !(bio->bi_opf & 1),
65+
};
66+
split_events.perf_submit(ctx, &data, sizeof(data));
67+
return 0;
68+
}
69+
70+
int trace_submit_bio(struct pt_regs *ctx, struct bio *bio)
71+
{
72+
dev_t device = bio->bi_bdev->bd_disk->part0.__dev.devt;
73+
int major = MAJOR(device);
74+
int minor = MINOR(device);
75+
u64 count = bio->bi_iter.bi_size;
76+
77+
if (!(CONDITIONALS))
78+
return 0;
79+
request_size_t data = {
80+
.size = count,
81+
.read = !(bio->bi_opf & 1),
82+
};
83+
bio_events.perf_submit(ctx, &data, sizeof(data));
84+
return 0;
85+
}
86+
87+
typedef struct bio_storage_s {
88+
struct bio *bio;
89+
} bio_storage_t;
90+
91+
BPF_HASH(bios, u64, bio_storage_t);
92+
93+
int trace_bio_iov_iter_get_pages(struct pt_regs *ctx, struct bio *bio)
94+
{
95+
u64 pid = bpf_get_current_pid_tgid();
96+
bio_storage_t data = {
97+
.bio = bio,
98+
};
99+
bios.update(&pid, &data);
100+
return 0;
101+
}
102+
103+
int trace_bio_iov_iter_get_pages_ret(struct pt_regs *ctx)
104+
{
105+
u64 pid = bpf_get_current_pid_tgid();
106+
bio_storage_t *data;
107+
108+
data = bios.lookup(&pid);
109+
if (!data)
110+
return 0;
111+
112+
u64 opf;
113+
request_size_t req = {};
114+
bpf_probe_read(&req.size, sizeof(u64), &data->bio->bi_iter.bi_size);
115+
bpf_probe_read(&opf, sizeof(u64), &data->bio->bi_opf);
116+
req.read = !(opf & 1);
117+
iter_events.perf_submit(ctx, &req, sizeof(req));
118+
bios.delete(&pid);
119+
return 0;
120+
}
121+
122+
"""
123+
124+
parser = argparse.ArgumentParser()
125+
parser.add_argument("-d", "--device",
126+
help="Trace this device only")
127+
args = parser.parse_args()
128+
129+
disks = []
130+
if args.device:
131+
disks.append({'name': os.path.basename(args.device)})
132+
else:
133+
dev_patterns = ['sd.*', 'nvme.*', 'nbd.*', 'md.*', "fio*", "etherd*"]
134+
for device in glob.glob("/sys/block/*"):
135+
for pattern in dev_patterns:
136+
if re.compile(pattern).match(os.path.basename(device)):
137+
if pattern == "etherd*":
138+
disks.append({'name': os.path.basename(device).replace('!', '/')})
139+
else:
140+
disks.append({'name': os.path.basename(device)})
141+
if debug:
142+
print(disks)
143+
144+
first = True
145+
conditional_template = "(major == MAJOR && minor == MINOR)"
146+
conditionals = ""
147+
for disk in disks:
148+
stinfo = os.stat('/dev/{}'.format(disk['name']))
149+
disk['major'] = os.major(stinfo.st_rdev)
150+
disk['minor'] = os.minor(stinfo.st_rdev)
151+
tmp = conditional_template.replace('MAJOR', "{}".format(disk['major']))
152+
tmp = tmp.replace('MINOR', "{}".format(disk['minor']))
153+
if not first:
154+
conditionals += " || "
155+
first = False
156+
conditionals += tmp
157+
158+
if conditionals == "":
159+
conditionals = "1"
160+
bpf_text = bpf_text.replace('CONDITIONALS', conditionals)
161+
162+
# load BPF program
163+
b = BPF(text=bpf_text)
164+
b.attach_kprobe(event="submit_bio", fn_name="trace_submit_bio")
165+
b.attach_kprobe(event="bio_iov_iter_get_pages", fn_name="trace_bio_iov_iter_get_pages")
166+
b.attach_kretprobe(event="bio_iov_iter_get_pages", fn_name="trace_bio_iov_iter_get_pages_ret")
167+
b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
168+
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
169+
b.attach_kprobe(event="bio_split", fn_name="trace_bio_split")
170+
171+
class RequestSize(ct.Structure):
172+
_fields_ = [
173+
("size", ct.c_ulonglong),
174+
("read", ct.c_ulonglong),
175+
]
176+
177+
def print_size(prestr, event):
178+
iostr = "write"
179+
if event.read == 1:
180+
iostr = "read"
181+
print("{} {}: {}".format(prestr, iostr, event.size))
182+
183+
def print_bio_size(cpu, data, size):
184+
event = ct.cast(data, ct.POINTER(RequestSize)).contents
185+
print_size("bio", event)
186+
187+
def print_iter_size(cpu, data, size):
188+
event = ct.cast(data, ct.POINTER(RequestSize)).contents
189+
print_size("iter", event)
190+
191+
def print_req_size(cpu, data, size):
192+
event = ct.cast(data, ct.POINTER(RequestSize)).contents
193+
print_size("req", event)
194+
195+
def print_split_size(cpu, data, size):
196+
event = ct.cast(data, ct.POINTER(RequestSize)).contents
197+
print_size("split", event)
198+
199+
b["bio_events"].open_perf_buffer(print_bio_size)
200+
b["iter_events"].open_perf_buffer(print_iter_size)
201+
b["req_events"].open_perf_buffer(print_req_size)
202+
b["split_events"].open_perf_buffer(print_split_size)
203+
while 1:
204+
b.kprobe_poll()

kswapd-work.py

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from bcc import BPF
2+
from time import sleep
3+
import signal
4+
5+
def signal_ignore(signal, frame):
6+
print()
7+
8+
class SignalInterrupt(Exception):
9+
def __init__(self, message):
10+
super(SignalInterrupt, self).__init__(message)
11+
12+
def signal_stop(signal, frame):
13+
raise SignalInterrupt("Interrupted!")
14+
15+
bpf_text = """
16+
#include <uapi/linux/ptrace.h>
17+
#include <linux/vmpressure.h>
18+
19+
#define SCANNED_ID 1
20+
#define RECLAIMED_ID 2
21+
#define WAKEUPS 3
22+
23+
BPF_HASH(counts, int);
24+
25+
/* We use vmpressure because struct scan_control is internal to vmscan.c, so we
26+
* use vmpressure as an analog.
27+
*/
28+
int trace_vmpressure(struct pt_regs *ctx, gfp_t gfp, struct mem_cgroup *memcg,
29+
bool tree, unsigned long scanned, unsigned long reclaimed)
30+
{
31+
int id;
32+
u64 zero = 0, *val;
33+
34+
id = SCANNED_ID;
35+
val = counts.lookup_or_init(&id, &zero);
36+
(*val) += scanned;
37+
id = RECLAIMED_ID;
38+
val = counts.lookup_or_init(&id, &zero);
39+
(*val) += reclaimed;
40+
return 0;
41+
}
42+
43+
/* We can hit this via direct reclaim, but my test cases never hit direct
44+
* reclaim, so I'm taking the easy way out.
45+
*/
46+
int trace_shrink_node(struct pt_regs *ctx)
47+
{
48+
int id = WAKEUPS;
49+
u64 zero = 0, *val;
50+
val = counts.lookup_or_init(&id, &zero);
51+
(*val)++;
52+
return 0;
53+
}
54+
"""
55+
56+
b = BPF(text=bpf_text)
57+
b.attach_kprobe(event="vmpressure", fn_name="trace_vmpressure")
58+
b.attach_kprobe(event="shrink_node", fn_name="trace_shrink_node")
59+
60+
print("Tracing, hit Ctrl+C to exit")
61+
signal.signal(signal.SIGINT, signal_stop)
62+
try:
63+
sleep(99999999)
64+
except SignalInterrupt:
65+
signal.signal(signal.SIGINT, signal_ignore)
66+
except KeyboardInterrupt:
67+
signal.signal(signal.SIGINT, signal_ignore)
68+
69+
counts = b.get_table("counts")
70+
scanned = 0
71+
reclaimed = 0
72+
wakeups = 0
73+
for k,v in counts.items():
74+
if k.value == 1:
75+
scanned = v.value
76+
if k.value == 2:
77+
reclaimed = v.value
78+
if k.value == 3:
79+
wakeups = v.value
80+
81+
print("Total wake ups: {}".format(wakeups))
82+
print("Total scanned: {}".format(scanned))
83+
print("Total reclaimed: {}".format(reclaimed))
84+
if wakeups > 0:
85+
print("Avg scanned per run: {}".format(float(scanned) / wakeups))
86+
print("Avg reclaimed per run: {}".format(float(reclaimed) / wakeups))

0 commit comments

Comments
 (0)