Purecap nvi(1) crash
Closed this issue · 4 comments
I was casually editing a kernel source-code file on my Morello box, running with pure cap + heap temporal safety + c18n and encountered this crash:
Core was generated by `vi kern_proc.c'.
Program terminated with signal SIGPROT, CHERI protection violation.
Capability bounds fault.
#0 apply_with (sp=0x41410400 [rwRW,0x41410400-0x41411730], lno=2620,
p=0x4299af91 [rwRW,0x42993000-0x4299b060] "\t", len=<optimized out>,
db_func=<optimized out>) at /usr/src/contrib/nvi/common/log.c:740
740 bp[i] = (lp[i] >> offl) ^ (lp[i+1] << offr);
(gdb) bt
#0 apply_with (sp=0x41410400 [rwRW,0x41410400-0x41411730], lno=2620,
p=0x4299af91 [rwRW,0x42993000-0x4299b060] "\t", len=<optimized out>,
db_func=<optimized out>) at /usr/src/contrib/nvi/common/log.c:740
#1 log_backward (sp=0x41410400 [rwRW,0x41410400-0x41411730],
rp=<optimized out>) at /usr/src/contrib/nvi/common/log.c:435
#2 0x0000000000169620 in vi (spp=0x41fffda0 [rwRW,0x41fffda0-0x41fffdb0])
at /usr/src/contrib/nvi/vi/vi.c:226
#3 0x000000000013b498 in editor (gp=0x4140f000 [rwRW,0x4140f000-0x414102e0],
argc=<optimized out>, argv=<optimized out>)
at /usr/src/contrib/nvi/common/main.c:402
#4 0x0000000000130d0c in main (argc=2620,
argv=0xffffbff7f540 [rwRW,0xffffbff7f540-0xffffbff7f570])
at /usr/src/contrib/nvi/cl/cl_main.c:115
(gdb) info registers
x0 0x415058c8 1095784648
x1 0x4299b060 1117368416
x2 0x415058d0 1095784656
x3 0x0 0
x4 0x41fff3c0 1107293120
x5 0x414f8030 1095729200
x6 0x0 0
x7 0x0 0
x8 0x8 8
x9 0x38 56
x10 0x1 1
x11 0x2900000029 176093659177
x12 0x0 0
x13 0x1f 31
x14 0xcc 204
x15 0x1 1
x16 0x4055d820 1079367712
x17 0x403f0b8d 1077873549
x18 0x0 0
x19 0x41410400 1094779904
x20 0x0 0
x21 0xa3c 2620
x22 0x41fff720 1107293984
x23 0x1a4f70 1724272
x24 0x1172b1 1143473
x25 0x4299af91 1117368209
x26 0x41fff6d8 1107293912
x27 0x41481200 1095242240
x28 0x4299af89 1117368201
x29 0x41fff410 1107293200
x30 0x139e3d 1285693
sp 0xfffffff80000 281474976186368
pc 0x13a124 1286436
cpsr 0x24000200 [ EL=0 D BTYPE=0 C64 C ]
fpsr 0x0 [ ]
fpcr 0x0 [ Len=0 Stride=0 RMode=0 ]
c0 0xdc5d40005a00580000000000415058c8 0x415058c8 [rwRW,0x41505800-0x41505a00]
c1 0xdc5d400018379806000000004299b060 0x4299b060 [rwRW,0x42993000-0x4299b060]
c2 0xdc5d40005a00580000000000415058d0 0x415058d0 [rwRW,0x41505800-0x41505a00]
c3 0x0 0x0
c4 0xdc5f400073e0f3c00000000041fff3c0 0x41fff3c0 [rwRW,0x41fff3c0-0x41fff3e0]
c5 0xdc5d40000037c00600000000414f8030 0x414f8030 [rwRW,0x414f8000-0x41500060]
c6 0x0 0x0
c7 0x0 0x0
c8 0x8 0x8
c9 0x38 0x38
c10 0x1 0x1
c11 0x2900000029 0x2900000029
c12 0x0 0x0
c13 0x1f 0x1f
c14 0xcc 0xcc
c15 0x1 0x1
c16 0xb05d400019a633a7000000004055d820 0x4055d820 <mpool_get@got[plt]> [rxR,0x4033a000-0x4099a000]
c17 0xb05d400099a633a700000000403f0b8d 0x403f0b8d <mpool_get> [rxR,0x4033a000-0x4099a000] (sentry)
c18 0x0 0x0
c19 0xdc5d4000573004000000000041410400 0x41410400 [rwRW,0x41410400-0x41411730]
c20 0x0 0x0
c21 0xa3c 0xa3c
--Type <RET> for more, q to quit, c to continue without paging--
c22 0xdc5f40007800f7200000000041fff720 0x41fff720 [rwRW,0x41fff720-0x41fff800]
c23 0xdc5d40004f804f7000000000001a4f70 0x1a4f70 <apply_with[bp]> [rwRW,0x1a4f70-0x1a4f80]
c24 0xb05d40001287800200000000001172b1 0x1172b1 [rxR,0x100000-0x1a5000]
c25 0xdc5d400018379806000000004299af91 0x4299af91 [rwRW,0x42993000-0x4299b060]
c26 0xdc5f400076f0f6100000000041fff6d8 0x41fff6d8 [rwRW,0x41fff610-0x41fff6f0]
c27 0xdc5d4000530012000000000041481200 0x41481200 [rwRW,0x41481200-0x41481300]
c28 0xdc5d400018379806000000004299af89 0x4299af89 [rwRW,0x42993000-0x4299b060]
c29 0xdc5f40000006c0070000000041fff410 0x41fff410 [rwRW,0x41c00000-0x42000000]
c30 0xb05d4000928780020000000000139e3d 0x139e3d <log_backward+352> [rxR,0x100000-0x1a5000] (sentry)
csp 0xdc5d40003ffdbfff0000fffffff80000 0xfffffff80000 [rwRW,0xffffbff80000-0xfffffff80000]
pcc 0xb05d400012878002000000000013a124 0x13a124 <log_backward+1096> [rxR,0x100000-0x1a5000]
ddc 0xdc5f40000807c8040000000040a03e00 0x40a03e00 [rwRW,0x409e4000-0x40a04000]
ctpidr 0xdc5d4000414040400000000040204040 0x40204040 [rwRW,0x40204040-0x40204140]
rcsp 0xdc5f40000006c0070000000041fff340 0x41fff340 [rwRW,0x41c00000-0x42000000]
rddc 0x0 0x0
rctpidr 0xdc5f4000101f501e0000000040c6a050 0x40c6a050 [rwRW,0x40c6a030-0x40c72030]
cid 0x0 0x0
cctlr <unavailable>
(gdb) list
735 }
736 for (i = 0; i < cnt; ++i)
737 #if BYTE_ORDER == BIG_ENDIAN
738 bp[i] = (lp[i] << offl) ^ (lp[i+1] >> offr);
739 #else
740 bp[i] = (lp[i] >> offl) ^ (lp[i+1] << offr);
741 #endif
742 p = (u_char *)bp;
743 }
744 #endif
(gdb) disassemble
...
0x000000000013a108 <+1068>: mov x11, xzr
0x000000000013a10c <+1072>: lsl x12, x11, #3
0x000000000013a110 <+1076>: sub x10, x10, x11
0x000000000013a114 <+1080>: sub x12, x12, x15
0x000000000013a118 <+1084>: add c0, c0, x11, uxtx #3
0x000000000013a11c <+1088>: add x12, x12, #0x10
0x000000000013a120 <+1092>: add c1, c28, x12, uxtx
=> 0x000000000013a124 <+1096>: ldp x11, x12, [c1, #-8]
0x000000000013a128 <+1100>: subs x10, x10, #0x1
0x000000000013a12c <+1104>: add c1, c1, #0x8
0x000000000013a130 <+1108>: lsr x11, x11, x8
0x000000000013a134 <+1112>: lsl x12, x12, x9
0x000000000013a138 <+1116>: eor x11, x12, x11
0x000000000013a13c <+1120>: str x11, [c0], #8
...
(gdb) up
#1 log_backward (sp=0x41410400 [rwRW,0x41410400-0x41411730],
rp=<optimized out>) at /usr/src/contrib/nvi/common/log.c:435
435 if (apply_with(db_set, sp, lno,
(gdb) list
430 case LOG_LINE_RESET_F:
431 break;
432 case LOG_LINE_RESET_B:
433 didop = 1;
434 memmove(&lno, p + sizeof(u_char), sizeof(recno_t));
435 if (apply_with(db_set, sp, lno,
436 p + CHAR_T_OFFSET, data.size - CHAR_T_OFFSET))
437 goto err;
438 if (sp->rptlchange != lno) {
439 sp->rptlchange = lno;
Frustrating, all local variables of interest in apply_with()
appear to have been optimised out at the point of crash -- they are allocated locally within the block, but still a little surprising none can be reached. However, in log_backwards()
, we do find some useful state:
(gdb) inspect db_set
$4 = {int (SCR *, recno_t, CHAR_T *, size_t)} 0x13930c <db_set>
(gdb) inspect sp
$5 = (SCR *) 0x41410400 [rwRW,0x41410400-0x41411730]
(gdb) inspect lno
$6 = 2620
(gdb) inspect p + CHAR_T_OFFSET
No symbol "CHAR_T_OFFSET" in current context.
(gdb) inspect p
$7 = (u_char *) 0x4299af89 [rwRW,0x42993000-0x4299b060] "\a<\n"
(gdb) inspect data.size
$8 = 212
It seems that c1
(possibly lp
?) is at the end of its buffer and attempting to load pair (ldp
) from 8 bytes short of that, which isn’t enough for 64-bit registers let alone 128-bit ones:
c1 0xdc5d400018379806000000004299b060 0x4299b060 [rwRW,0x42993000-0x4299b060]
The context here is:
735 }
736 for (i = 0; i < cnt; ++i)
737 #if BYTE_ORDER == BIG_ENDIAN
738 bp[i] = (lp[i] << offl) ^ (lp[i+1] >> offr);
739 #else
740 bp[i] = (lp[i] >> offl) ^ (lp[i+1] << offr);
741 #endif
lp
is of type nword *
, which is in fact unsigned long *
, and it isn’t surprising that the compiler might (via SROA or similar) try to convert the two (statically) adjacent 8-byte reads into a wider read. But there appear to be two problems here:
ldp
in pure cap code will load two adjacent 16-byte values, not two adjacent 8-byte values.- Even if it were a ldp for two adjacent 8-byte reads .. they still wouldn’t fit in the remaining buffer space, as the pointer is to the upper bound location, and the offset is -8 and not -16.
I haven’t tried to plumb the code generation further, but it seems like at the very least ldp
is the wrong thing here, and no matter how we look at it, the immediate offset is likely wrong. But a more detailed reading of the code generation is required to understand this better, which I don’t have time for currently.
I have the binary and core dump easily to hand [for now].
Is this https://github.com/lichray/nvi2/pull/122/files not having been backported?
Should have mentioned: This is running on the unmodified head of the demo-2024-10
branch.
Is this https://github.com/lichray/nvi2/pull/122/files not having been backported?
Looks like lichray hasn't pushed out a new release since just before the fix. I'll see about looping the fix through FreeBSD.
Fixed in dev so closing