iovisor/ply

problem on linux 4.19.91

supersojo opened this issue · 7 comments

root@vm_compiler-s8:~/ply# ply -dS 'k:schedule { exit(0); }'

-- globals
int stdbuf{u32}
k:schedule
{}void
bwritevoid
ctxvoid __bpf *
stdbufint{u32}
:structstruct :anon_0x24058e0
0u64
:structstruct :anon_0x2405880
0int

-- locals
void __bpf *ctx
-- ir
0 movq r2, r1
;; >pre {}()
;; >pre bwrite()
;; >pre ctx()
;; >post ctx()
;; >pre stdbuf()
;; >post stdbuf()
;; >pre :struct()
1 stw [bp - 0x4], #0x0
;; >pre <0>
;; >post <0>
;; >pre :struct()
;; >pre <0>
;; >post <0>
;; >post :struct()
2 stw [bp - 0x8], #0x0
;; >post :struct()
3 stq [bp - 0x10], #0x0
;; >post bwrite()
4 movq r1, r2
5 ldmap r2, stdbuf
6 ldw r0, #0x0
7 movw r3, #-0x1
8 movq r4, bp
9 addq r4, #-0x10
10 movq r5, #0x10
11 call perf_event_output
;; >post {}()
12 exit
error: could not link map to queue
error: unable to create buffer 'stdbuf'
ERR:-22

I am using powerpc 4.19.90 and see a similar output:

# ply -dS 'k:schedule { exit(0); }'
info: creating kallsyms cache


-- globals
int stdbuf{u32}
k:schedule
{}void
    bwritevoid
        ctxvoid __bpf *
        stdbufint{u32}
        :structstruct :anon_0xb7e595d0
            0u64
            :structstruct :anon_0xb7e59590
                0int


-- locals
void __bpf *ctx
-- ir
  0     movq    r2, r1
;; >pre  {}()
;; >pre  bwrite()
;; >pre  ctx()
;; >post ctx()
;; >pre  stdbuf()
;; >post stdbuf()
;; >pre  :struct()
  1     stw     [bp - 0x4], #0x0
;; >pre  <0>
;; >post <0>
;; >pre  :struct()
;; >pre  <0>
;; >post <0>
;; >post :struct()
  2     stw     [bp - 0x8], #0x0
;; >post :struct()
  3     stq     [bp - 0x10], #0x0
;; >post bwrite()
  4     movq    r1, r2
  5     ldmap   r2, stdbuf
  6     ldw     r0, #0x0
  7     movw    r3, #-0x1
  8     movq    r4, bp
  9     addq    r4, #-0x10
 10     movw    r5, #0x10
 11     call    perf_event_output
;; >post {}()
 12     exit
error: could not create queue
error: unable to create buffer 'stdbuf'
ERR:-22
#

Most of the examples are working now. I just had to enable CONFIG_KPROBE_EVENTS. But I still can't get the syscall tracing to work. I must be missing some other kernel config option?

# ply -dS 'kretprobe:SyS_read { @["size"] = quantize(retval); }'


-- globals
quantize_(null)_t @{struct :anon_0xb7c0ef70}
kretprobe:SyS_read
{}void
    @=void
        []quantize_(null)_t
            @quantize_(null)_t{struct :anon_0xb7c0ef70}
            :structstruct :anon_0xb7c0ef70
                "size"char[8]
        quantizequantize_(null)_t
            .reg_t
                u*struct pt_regs
                    regsstruct pt_regs __bpf *
                        ctxvoid __bpf *
                "gpr3"char[8]


-- locals
long retval
struct pt_regs __bpf *regs
void __bpf *ctx
-- ir
  0     movq    r6, r1
;; >pre  {}()
;; >pre  @=()
;; >pre  []()
;; >pre  @()
;; >post @()
;; >pre  :struct()
;; >pre  "size"
;; >post "size"
  1     stw     [bp - 0x8], #0x73697a65
  2     stw     [bp - 0x4], #0x0
;; >post :struct()
;; >post []()
  3     ldmap   r1, @
  4     ldw     r0, #0x0
  5     movq    r2, bp
  6     addq    r2, #-0x8
  7     call    map_lookup_elem
  8     jeq     r0, #0x0, +6
  9     movq    r1, bp
 10     addq    r1, #-0x88
 11     movw    r2, #0x80
 12     movw    r3, r0
 13     call    probe_read
 14     ja      +16
L1:
 15     stq     [bp - 0x88], #0x0
 16     stq     [bp - 0x80], #0x0
 17     stq     [bp - 0x78], #0x0
 18     stq     [bp - 0x70], #0x0
 19     stq     [bp - 0x68], #0x0
 20     stq     [bp - 0x60], #0x0
 21     stq     [bp - 0x58], #0x0
 22     stq     [bp - 0x50], #0x0
 23     stq     [bp - 0x48], #0x0
 24     stq     [bp - 0x40], #0x0
 25     stq     [bp - 0x38], #0x0
 26     stq     [bp - 0x30], #0x0
 27     stq     [bp - 0x28], #0x0
 28     stq     [bp - 0x20], #0x0
 29     stq     [bp - 0x18], #0x0
 30     stq     [bp - 0x10], #0x0
L2:
;; >pre  quantize()
;; >pre  .()
;; >pre  u*()
;; >pre  regs()
;; >pre  ctx()
;; >post ctx()
;; >post regs()
;; >post u*()
;; >pre  "gpr3"
;; >post "gpr3"
;; >post .()
 31     movq    r3, r6
 32     addq    r3, #0xc
 33     movw    r2, #0x4
 34     movq    r1, bp
 35     addq    r1, #-0x8c
 36     call    probe_read
;; >post quantize()
 37     movw    r0, #0x0
 38     ldw     r1, [bp - 0x8c]
 39     jle     r1, #0xffff, +2
 40     addw    r0, #0x10
 41     rshq    r1, #0x10
 42     jle     r1, #0xff, +2
 43     addw    r0, #0x8
 44     rshq    r1, #0x8
 45     jle     r1, #0xf, +2
 46     addw    r0, #0x4
 47     rshq    r1, #0x4
 48     jle     r1, #0x3, +2
 49     addw    r0, #0x2
 50     rshq    r1, #0x2
 51     jle     r1, #0x1, +2
 52     addw    r0, #0x1
 53     rshq    r1, #0x1
 54     lshw    r0, #0x2
 55     movq    r1, bp
 56     addq    r1, #-0x88
 57     addq    r1, r0
 58     movw    r0, #0x1
 59     stw     r1, r0
;; >post @=()
 60     ldmap   r1, @
 61     ldw     r0, #0x0
 62     movq    r2, bp
 63     addq    r2, #-0x8
 64     movq    r3, bp
 65     addq    r3, #-0x88
 66     movw    r4, #0x0
 67     call    map_update_elem
;; >post {}()
 68     exit
ERR:-22

I have all the examples working now. There was a problem with the __xprobe_create and delete functions. xprobe.c sets the write buffer to 4k bytes, but for some reason on my embedded system the writev call was returning with max 1K bytes. So, if the wildcard list of probes was too big it ended up chopping the probe create write request in the middle. I ended up just flushing the write after every individual probe create.

wkz commented

Sorry for the latency. Always nice to revisit an issue a couple of days later and find that the issue has been mostly resolved 😄

@dcrawford1: Where you able to spot which parameter is set to 1k vs. 4k on your system? Presumably your page size is still 4k? It would be nice if ply could adapt to the kernel's preferences.

Additionally, it seems like two things need attention here:

  • The error message generated when the kernel does not have the proper support is inscrutable. I'll put an item on the todo list to add some sort of self-test.
  • I should probably update the example since the whole SyS_ prefix only works on x86_64 as far as I know.

The page size is definitely 4k. I am not sure where the 1K limit is set. I am using buildroot on powerpc with musl. The setvbuf(xp->ctrl, NULL, _IOFBF, 0x1000) call in xprobe_attach is returning success. If the kprobe wildcard set is small like 3 or 4 probes and the write buffer size is <1K it works fine. But, if the wildcard set is too big then the writev in strace returns with 1024 bytes written and the kprobe create request gets corrupted. I don't think it is too bad to just flush the write after each probe create/delete request. Even if I use a sys_* wildcard on my slow powerpc embedded system it only takes a second or two to setup.

# grep PageSize /proc/1/smaps
KernelPageSize:        4 kB
MMUPageSize:           4 kB

By the way, I love this project. This is the only solution to get BPF tracing working on our powerpc 64MB embedded target.

Is there any way to access indexed values within the array returned by the mem() function? I want to only continue processing the aggregation if the 2nd element returned by the mem(arg1, 10) function is a specific value.

wkz commented

I don't think it is too bad to just flush the write after each probe create/delete request.

Unfortunately this seems to be heavily architecture dependent. Some archs are terribly slow when these calls are not batched. I think there was a more efficient interface added to do this a while back though, so that might be the proper solution.

Thank you for those kind words. Yes, I also work on embedded systems, so that has been a target from the start.

As for indexing into arrays, I'm sad to say that I haven't gotten around to it yet. ply frustratingly knows that the returned data is an array, but there is no support in the grammar to index it 🤦‍♂️. It's high on my list as it is an issue i run into a lot as well, that and not having type casts.