problem on linux 4.19.91
supersojo opened this issue · 7 comments
root@vm_compiler-s8:~/ply# ply -dS 'k:schedule { exit(0); }'
-- globals
int stdbuf{u32}
k:schedule
{}void
bwritevoid
ctxvoid __bpf *
stdbufint{u32}
:structstruct :anon_0x24058e0
0u64
:structstruct :anon_0x2405880
0int
-- locals
void __bpf *ctx
-- ir
0 movq r2, r1
;; >pre {}()
;; >pre bwrite()
;; >pre ctx()
;; >post ctx()
;; >pre stdbuf()
;; >post stdbuf()
;; >pre :struct()
1 stw [bp - 0x4], #0x0
;; >pre <0>
;; >post <0>
;; >pre :struct()
;; >pre <0>
;; >post <0>
;; >post :struct()
2 stw [bp - 0x8], #0x0
;; >post :struct()
3 stq [bp - 0x10], #0x0
;; >post bwrite()
4 movq r1, r2
5 ldmap r2, stdbuf
6 ldw r0, #0x0
7 movw r3, #-0x1
8 movq r4, bp
9 addq r4, #-0x10
10 movq r5, #0x10
11 call perf_event_output
;; >post {}()
12 exit
error: could not link map to queue
error: unable to create buffer 'stdbuf'
ERR:-22
I am using powerpc 4.19.90 and see a similar output:
# ply -dS 'k:schedule { exit(0); }'
info: creating kallsyms cache
-- globals
int stdbuf{u32}
k:schedule
{}void
bwritevoid
ctxvoid __bpf *
stdbufint{u32}
:structstruct :anon_0xb7e595d0
0u64
:structstruct :anon_0xb7e59590
0int
-- locals
void __bpf *ctx
-- ir
0 movq r2, r1
;; >pre {}()
;; >pre bwrite()
;; >pre ctx()
;; >post ctx()
;; >pre stdbuf()
;; >post stdbuf()
;; >pre :struct()
1 stw [bp - 0x4], #0x0
;; >pre <0>
;; >post <0>
;; >pre :struct()
;; >pre <0>
;; >post <0>
;; >post :struct()
2 stw [bp - 0x8], #0x0
;; >post :struct()
3 stq [bp - 0x10], #0x0
;; >post bwrite()
4 movq r1, r2
5 ldmap r2, stdbuf
6 ldw r0, #0x0
7 movw r3, #-0x1
8 movq r4, bp
9 addq r4, #-0x10
10 movw r5, #0x10
11 call perf_event_output
;; >post {}()
12 exit
error: could not create queue
error: unable to create buffer 'stdbuf'
ERR:-22
#
Most of the examples are working now. I just had to enable CONFIG_KPROBE_EVENTS. But I still can't get the syscall tracing to work. I must be missing some other kernel config option?
# ply -dS 'kretprobe:SyS_read { @["size"] = quantize(retval); }'
-- globals
quantize_(null)_t @{struct :anon_0xb7c0ef70}
kretprobe:SyS_read
{}void
@=void
[]quantize_(null)_t
@quantize_(null)_t{struct :anon_0xb7c0ef70}
:structstruct :anon_0xb7c0ef70
"size"char[8]
quantizequantize_(null)_t
.reg_t
u*struct pt_regs
regsstruct pt_regs __bpf *
ctxvoid __bpf *
"gpr3"char[8]
-- locals
long retval
struct pt_regs __bpf *regs
void __bpf *ctx
-- ir
0 movq r6, r1
;; >pre {}()
;; >pre @=()
;; >pre []()
;; >pre @()
;; >post @()
;; >pre :struct()
;; >pre "size"
;; >post "size"
1 stw [bp - 0x8], #0x73697a65
2 stw [bp - 0x4], #0x0
;; >post :struct()
;; >post []()
3 ldmap r1, @
4 ldw r0, #0x0
5 movq r2, bp
6 addq r2, #-0x8
7 call map_lookup_elem
8 jeq r0, #0x0, +6
9 movq r1, bp
10 addq r1, #-0x88
11 movw r2, #0x80
12 movw r3, r0
13 call probe_read
14 ja +16
L1:
15 stq [bp - 0x88], #0x0
16 stq [bp - 0x80], #0x0
17 stq [bp - 0x78], #0x0
18 stq [bp - 0x70], #0x0
19 stq [bp - 0x68], #0x0
20 stq [bp - 0x60], #0x0
21 stq [bp - 0x58], #0x0
22 stq [bp - 0x50], #0x0
23 stq [bp - 0x48], #0x0
24 stq [bp - 0x40], #0x0
25 stq [bp - 0x38], #0x0
26 stq [bp - 0x30], #0x0
27 stq [bp - 0x28], #0x0
28 stq [bp - 0x20], #0x0
29 stq [bp - 0x18], #0x0
30 stq [bp - 0x10], #0x0
L2:
;; >pre quantize()
;; >pre .()
;; >pre u*()
;; >pre regs()
;; >pre ctx()
;; >post ctx()
;; >post regs()
;; >post u*()
;; >pre "gpr3"
;; >post "gpr3"
;; >post .()
31 movq r3, r6
32 addq r3, #0xc
33 movw r2, #0x4
34 movq r1, bp
35 addq r1, #-0x8c
36 call probe_read
;; >post quantize()
37 movw r0, #0x0
38 ldw r1, [bp - 0x8c]
39 jle r1, #0xffff, +2
40 addw r0, #0x10
41 rshq r1, #0x10
42 jle r1, #0xff, +2
43 addw r0, #0x8
44 rshq r1, #0x8
45 jle r1, #0xf, +2
46 addw r0, #0x4
47 rshq r1, #0x4
48 jle r1, #0x3, +2
49 addw r0, #0x2
50 rshq r1, #0x2
51 jle r1, #0x1, +2
52 addw r0, #0x1
53 rshq r1, #0x1
54 lshw r0, #0x2
55 movq r1, bp
56 addq r1, #-0x88
57 addq r1, r0
58 movw r0, #0x1
59 stw r1, r0
;; >post @=()
60 ldmap r1, @
61 ldw r0, #0x0
62 movq r2, bp
63 addq r2, #-0x8
64 movq r3, bp
65 addq r3, #-0x88
66 movw r4, #0x0
67 call map_update_elem
;; >post {}()
68 exit
ERR:-22
I have all the examples working now. There was a problem with the __xprobe_create and delete functions. xprobe.c sets the write buffer to 4k bytes, but for some reason on my embedded system the writev call was returning with max 1K bytes. So, if the wildcard list of probes was too big it ended up chopping the probe create write request in the middle. I ended up just flushing the write after every individual probe create.
Sorry for the latency. Always nice to revisit an issue a couple of days later and find that the issue has been mostly resolved 😄
@dcrawford1: Where you able to spot which parameter is set to 1k vs. 4k on your system? Presumably your page size is still 4k? It would be nice if ply
could adapt to the kernel's preferences.
Additionally, it seems like two things need attention here:
- The error message generated when the kernel does not have the proper support is inscrutable. I'll put an item on the todo list to add some sort of self-test.
- I should probably update the example since the whole
SyS_
prefix only works onx86_64
as far as I know.
The page size is definitely 4k. I am not sure where the 1K limit is set. I am using buildroot on powerpc with musl. The setvbuf(xp->ctrl, NULL, _IOFBF, 0x1000)
call in xprobe_attach is returning success. If the kprobe wildcard set is small like 3 or 4 probes and the write buffer size is <1K it works fine. But, if the wildcard set is too big then the writev in strace returns with 1024 bytes written and the kprobe create request gets corrupted. I don't think it is too bad to just flush the write after each probe create/delete request. Even if I use a sys_* wildcard on my slow powerpc embedded system it only takes a second or two to setup.
# grep PageSize /proc/1/smaps
KernelPageSize: 4 kB
MMUPageSize: 4 kB
By the way, I love this project. This is the only solution to get BPF tracing working on our powerpc 64MB embedded target.
Is there any way to access indexed values within the array returned by the mem() function? I want to only continue processing the aggregation if the 2nd element returned by the mem(arg1, 10) function is a specific value.
I don't think it is too bad to just flush the write after each probe create/delete request.
Unfortunately this seems to be heavily architecture dependent. Some archs are terribly slow when these calls are not batched. I think there was a more efficient interface added to do this a while back though, so that might be the proper solution.
Thank you for those kind words. Yes, I also work on embedded systems, so that has been a target from the start.
As for indexing into arrays, I'm sad to say that I haven't gotten around to it yet. ply
frustratingly knows that the returned data is an array, but there is no support in the grammar to index it 🤦♂️. It's high on my list as it is an issue i run into a lot as well, that and not having type casts.