Jump Table Index Introduction
Opened this issue · 3 comments
spectral/hartel/typecheck
segfaults due to a jump to an address that is not code. This program will fault even if compiled without GHC optimizations, and the failure happens regularly (only input 600 tested so far).
LBB247_10: ## in Loop: Header=BB247_5 Depth=1
pushq %rax
movq %rbx, %rax
LBB247_5: ## %cbJG
## =>This Inner Loop Header: Depth=1
movq 8(%rbp), %r14
movq 16(%rbp), %rcx
movl %eax, %edx
andl $7, %edx
addq $-2, %rdx
cmpq $3, %rdx
ja LBB247_7
## BB#6: ## %cbJG
## in Loop: Header=BB247_5 Depth=1
movslq (%rsi,%rdx,4), %rdx
addq %rsi, %rdx
jmpq *%rdx # <<<< sometimes a bad value
What is odd here is that we have an indirect jump was not marked as a TAILCALL
by LLVM... and the computation of the address is quite unusual. Currently, I'm trying to figure out how we even get this code in the _rDQ_info$def
function.
Here's the output of my debug session:
(lldb) run 600
There is a running process, kill it and restart?: [Y/n] Y
Process 803 exited with status = 9 (0x00000009)
Process 818 launched: '/Users/kavon/msr/ghc/nofib/spectral/hartel/typecheck/Main' (x86_64)
Process 818 stopped
* thread #1: tid = 0x5088de, 0x000000010000a1ca Main`rDQ_info$def + 186, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
frame #0: 0x000000010000a1ca Main`rDQ_info$def + 186
Main`rDQ_info$def:
-> 0x10000a1ca <+186>: jmpq *%rdx
0x10000a1cc <+188>: addq $0x88, %r12
0x10000a1d3 <+195>: cmpq 0x358(%r13), %r12
0x10000a1da <+202>: jbe 0x10000a34d ; <+573>
(lldb) register read rsi rdx
rsi = 0x000000010000b184 Main`rDQ_info$def + 4212
rdx = 0x000000010000a290 Main`rDQ_info$def + 384
(lldb) c
Process 818 resuming
Process 818 stopped
* thread #1: tid = 0x5088de, 0x000000010000a1ca Main`rDQ_info$def + 186, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
frame #0: 0x000000010000a1ca Main`rDQ_info$def + 186
Main`rDQ_info$def:
-> 0x10000a1ca <+186>: jmpq *%rdx
0x10000a1cc <+188>: addq $0x88, %r12
0x10000a1d3 <+195>: cmpq 0x358(%r13), %r12
0x10000a1da <+202>: jbe 0x10000a34d ; <+573>
(lldb) register read rsi rdx
rsi = 0x000000010000b184 Main`rDQ_info$def + 4212
rdx = 0x000000010000a230 Main`rDQ_info$def + 288
(lldb) c
Process 818 resuming
Process 818 stopped
* thread #1: tid = 0x5088de, 0x000000010000a1ca Main`rDQ_info$def + 186, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
frame #0: 0x000000010000a1ca Main`rDQ_info$def + 186
Main`rDQ_info$def:
-> 0x10000a1ca <+186>: jmpq *%rdx
0x10000a1cc <+188>: addq $0x88, %r12
0x10000a1d3 <+195>: cmpq 0x358(%r13), %r12
0x10000a1da <+202>: jbe 0x10000a34d ; <+573>
(lldb) register read rsi rdx
rsi = 0x0000004200005fff
rdx = 0x0000004200005fff
(lldb) stepi
Process 818 stopped
* thread #1: tid = 0x5088de, 0x0000004200005fff, queue = 'com.apple.main-thread', stop reason = instruction step into
frame #0: 0x0000004200005fff
-> 0x4200005fff: addb %al, (%rax)
0x4200006001: addb %al, (%rax)
0x4200006003: addb %al, (%rax)
0x4200006005: addb %al, (%rax)
(lldb)
Process 818 stopped
* thread #1: tid = 0x5088de, 0x0000004200005fff, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=2, address=0x4200005fff)
frame #0: 0x0000004200005fff
-> 0x4200005fff: addb %al, (%rax)
0x4200006001: addb %al, (%rax)
0x4200006003: addb %al, (%rax)
0x4200006005: addb %al, (%rax)
(lldb)
It seems the junk value comes in as rsi
to the return point (more context below):
.p2align 4, 0x90
.quad _SbVJ_srt$def-L7133701809754910780_1
.quad 2
.quad 140733193388062
L7133701809754910780_1:
LBB247_10: ## in Loop: Header=BB247_5 Depth=1
pushq %rax
movq %rbx, %rax
LBB247_5: ## %cbJG
## =>This Inner Loop Header: Depth=1
movq 8(%rbp), %r14
movq 16(%rbp), %rcx
movl %eax, %edx
andl $7, %edx
addq $-2, %rdx
cmpq $3, %rdx
ja LBB247_7
## BB#6: ## %cbJG
## in Loop: Header=BB247_5 Depth=1
movslq (%rsi,%rdx,4), %rdx
addq %rsi, %rdx
jmpq *%rdx
(lldb) run 600
There is a running process, kill it and restart?: [Y/n] y
Process 836 exited with status = 9 (0x00000009)
Process 840 launched: '/Users/kavon/msr/ghc/nofib/spectral/hartel/typecheck/Main' (x86_64)
Process 840 stopped
* thread #1: tid = 0x50a4af, 0x000000010000a1ac Main`rDQ_info$def + 156, queue = 'com.apple.main-thread', stop reason = breakpoint 4.1
frame #0: 0x000000010000a1ac Main`rDQ_info$def + 156
Main`rDQ_info$def:
-> 0x10000a1ac <+156>: movq 0x8(%rbp), %r14
0x10000a1b0 <+160>: movq 0x10(%rbp), %rcx
0x10000a1b4 <+164>: movl %eax, %edx
0x10000a1b6 <+166>: andl $0x7, %edx
(lldb) register read rsi rdx
rsi = 0x000000010000b184 Main`rDQ_info$def + 4212
rdx = 0x0000004200000140
(lldb) c
Process 840 resuming
Process 840 stopped
* thread #1: tid = 0x50a4af, 0x000000010000a1ca Main`rDQ_info$def + 186, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
frame #0: 0x000000010000a1ca Main`rDQ_info$def + 186
Main`rDQ_info$def:
-> 0x10000a1ca <+186>: jmpq *%rdx
0x10000a1cc <+188>: addq $0x88, %r12
0x10000a1d3 <+195>: cmpq 0x358(%r13), %r12
0x10000a1da <+202>: jbe 0x10000a34d ; <+573>
(lldb) register read rsi rdx
rsi = 0x000000010000b184 Main`rDQ_info$def + 4212
rdx = 0x000000010000a290 Main`rDQ_info$def + 384
(lldb) c
Process 840 resuming
Process 840 stopped
* thread #1: tid = 0x50a4af, 0x000000010000a1ac Main`rDQ_info$def + 156, queue = 'com.apple.main-thread', stop reason = breakpoint 4.1
frame #0: 0x000000010000a1ac Main`rDQ_info$def + 156
Main`rDQ_info$def:
-> 0x10000a1ac <+156>: movq 0x8(%rbp), %r14
0x10000a1b0 <+160>: movq 0x10(%rbp), %rcx
0x10000a1b4 <+164>: movl %eax, %edx
0x10000a1b6 <+166>: andl $0x7, %edx
(lldb) register read rsi rdx
rsi = 0x0000004200005fff
rdx = 0x0000004200005000
(lldb) c
Process 840 resuming
Process 840 stopped
* thread #1: tid = 0x50a4af, 0x000000010000a1ca Main`rDQ_info$def + 186, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
frame #0: 0x000000010000a1ca Main`rDQ_info$def + 186
Main`rDQ_info$def:
-> 0x10000a1ca <+186>: jmpq *%rdx
0x10000a1cc <+188>: addq $0x88, %r12
0x10000a1d3 <+195>: cmpq 0x358(%r13), %r12
0x10000a1da <+202>: jbe 0x10000a34d ; <+573>
(lldb) register read rsi rdx
rsi = 0x0000004200005fff
rdx = 0x0000004200005fff
(lldb)
The problem is the introduction of a jump table for large switches involved in a loop that contains a cpscall
. We see this pattern happen in the following common scenario:
- a large switch
- the GC strategy says to perform a heap test in each arm of the case. upon failure, we loop back to the switch.
For sufficiently large switches (5 or 6 seems to be enough), the jump table and the index introduced by LLC (the value passed in as rsi
in the previous discussion) becomes a new value that is live across the GC call, and is not saved to the GHC stack. In fact, it's left in a register that gets clobbered by the GC.
Here is a snippet of what the jump table dispatch looks like. You'll notice that in the above discussion, the JTI is live in the loop.
leaq LJTI0_0(%rip), %rbx
cmpq $5, %rdi
jbe LBB0_2
jmp LBB0_17
LBB0_2: ## %bigSwitch
movslq (%rbx,%rdi,4), %rax
addq %rbx, %rax
jmpq *%rax
.p2align 2, 0x90
.data_region jt32
L0_0_set_3 = LBB0_3-LJTI0_0
L0_0_set_7 = LBB0_7-LJTI0_0
L0_0_set_9 = LBB0_9-LJTI0_0
L0_0_set_11 = LBB0_11-LJTI0_0
L0_0_set_13 = LBB0_13-LJTI0_0
L0_0_set_15 = LBB0_15-LJTI0_0
LJTI0_0:
.long L0_0_set_3
.long L0_0_set_7
.long L0_0_set_9
.long L0_0_set_11
.long L0_0_set_13
.long L0_0_set_15
.end_data_region
Here's the current example I have going. Unfortunately I couldn't force the JTI to be live in the loop, but it's enough to use to start working on a fix. Note also that the jump table is introduced during initial isel (IR -> SelectionDAG) and not a separate optimization pass.
Current workaround to turn off this optimization is to pass -min-jump-table-entries=1000
to raise the threshold so it doesn't happen.
declare i64 @doGC(i64)
@gc_flag = external global i64
define i64 @foo (i64 %tgt) {
entry:
br label %bigSwitch
bigSwitch:
%phi_tgt = phi i64 [%tgt, %entry], [%tgt0, %gc0] , [%tgt1, %gc1] , [%tgt2, %gc2] , [%tgt3, %gc3] , [%tgt4, %gc4] , [%tgt5, %gc5] , [%tgt6, %gc6]
switch i64 %phi_tgt, label %case6 [i64 0, label %case0
i64 1, label %case1
i64 2, label %case2
i64 3, label %case3
i64 4, label %case4
i64 5, label %case5]
case0:
%v0 = load i64, i64* @gc_flag
%c0 = icmp eq i64 0, %v0
br i1 %c0, label %gc0, label %ret0
gc0:
%tgt0 = call i64 @doGC(i64 %phi_tgt)
br label %bigSwitch
ret0:
ret i64 0
case1:
%v1 = load i64, i64* @gc_flag
%c1 = icmp eq i64 1, %v1
br i1 %c1, label %gc1, label %ret1
gc1:
%tgt1 = call i64 @doGC(i64 %phi_tgt)
br label %bigSwitch
ret1:
ret i64 1
case2:
%v2 = load i64, i64* @gc_flag
%c2 = icmp eq i64 2, %v2
br i1 %c2, label %gc2, label %ret2
gc2:
%tgt2 = call i64 @doGC(i64 %phi_tgt)
br label %bigSwitch
ret2:
ret i64 2
case3:
%v3 = load i64, i64* @gc_flag
%c3 = icmp eq i64 3, %v3
br i1 %c3, label %gc3, label %ret3
gc3:
%tgt3 = call i64 @doGC(i64 %phi_tgt)
br label %bigSwitch
ret3:
ret i64 3
case4:
%v4 = load i64, i64* @gc_flag
%c4 = icmp eq i64 4, %v4
br i1 %c4, label %gc4, label %ret4
gc4:
%tgt4 = call i64 @doGC(i64 %phi_tgt)
br label %bigSwitch
ret4:
ret i64 4
case5:
%v5 = load i64, i64* @gc_flag
%c5 = icmp eq i64 5, %v5
br i1 %c5, label %gc5, label %ret5
gc5:
%tgt5 = call i64 @doGC(i64 %phi_tgt)
br label %bigSwitch
ret5:
ret i64 5
case6:
%v6 = load i64, i64* @gc_flag
%c6 = icmp eq i64 6, %v6
br i1 %c6, label %gc6, label %ret6
gc6:
%tgt6 = call i64 @doGC(i64 %phi_tgt)
br label %bigSwitch
ret6:
ret i64 6
}