unaligned load in struct copy sequence
brooksdavis opened this issue · 2 comments
brooksdavis commented
This bit of assembly is produced:
daddiu $1, $16, %got_ofst(PS)
cfromptr $c1, $c0, $1
csetbounds $c1, $c1, 48 # instantiate a capability to PS
clc $c2, $zero, 0($c1) # load a capability (works)
csc $c2, $zero, 0($c21)
cincoffset $c2, $c1, 12 # increment by 12 (???)
clc $c2, $zero, 0($c2) # attempt an unaligned load!
csc $c2, $zero, 0($c22)
clw $1, $zero, 28($c1)
A somewhat reduced case from usr.bin/sed/process.c (which generates many of these) is:
#define NULL (void *)0
typedef struct {
char *space; /* Current space pointer. */
unsigned long len; /* Current length. */
int deleted; /* If deleted. */
int append_newline; /* If originally terminated by \n. */
char *back; /* Backing memory. */
} xSPACE;
struct s_command {
struct s_command *next;
};
extern unsigned long linenum;
static xSPACE HS, PS;
static inline int applies(struct s_command *);
int mf_fgets(xSPACE *);
void
xprocess(struct s_command *cp)
{
xSPACE tspace;
for (linenum = 0; mf_fgets(&PS);) {
while (cp != NULL) {
if (cp->next != NULL) {
cp = cp->next;
continue;
}
tspace = PS;
PS = HS;
PS.append_newline = tspace.append_newline;
HS = tspace;
break;
}
} /* for all lines */
}
Further reducing the complexity of the loops does still produce wrong code, but somewhat less obviously wrong so I stopped here where the error is unambiguous.
A working command line is:
clang -g -integrated-as --target=cheri-unknown-freebsd -msoft-float -cheri=128 -G0 -EB -mabi=purecap -O -c testcase.c -o - -S
The issue persists at -O
, -O1
, and -O2
.
davidchisnall commented
This test case appears to have 3 memcpy intrinsics in the basic block that generates the bad instruction sequence:
call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* nonnull align 16 %tspace.sroa.0.0..sroa_idx10, i8 addrspace(200)* align 16 bitcast (%struct.xSPACE addrspace(200)* @PS to i8 addrspace(200)*), i64 28, i1 false)
tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 bitcast (%struct.xSPACE addrspace(200)* @PS to i8 addrspace(200)*), i8 addrspace(200)* align 16 bitcast (%struct.xSPACE addrspace(200)* @HS to i8 addrspace(200)*), i64 48, i1 false), !tbaa.struct !10
call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 bitcast (%struct.xSPACE addrspace(200)* @HS to i8 addrspace(200)*), i8 addrspace(200)* nonnull align 16 %tspace.sroa.0.0..sroa_idx10, i64 28, i1 false)
This looks as if it's one of the 28-byte ones. SelectionDAG sometimes decides to use overlapping stores, but I thought I'd added a special case for iFATPTR*
types telling it that it wasn't allowed to.
davidchisnall commented
Simple test case:
target datalayout = "E-m:e-pf200:128:128-i8:8:32-i16:16:32-i64:64-n32:64-S128-A200"
target triple = "cheri-unknown-freebsd"
%struct.name_t = type { i8 addrspace(200)* }
@x = common local_unnamed_addr addrspace(200) global %struct.name_t zeroinitializer, align 16
; Function Attrs: nounwind
define void @test(%struct.name_t addrspace(200)* %str) local_unnamed_addr #0 {
entry:
%0 = bitcast %struct.name_t addrspace(200)* %str to i8 addrspace(200)*
call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* align 16 %0, i8 addrspace(200)* align 16 bitcast (%struct.name_t addrspace(200)* @x to i8 addrspace(200)*), i64 28, i1 false), !tbaa.struct !3
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* nocapture writeonly, i8 addrspace(200)* nocapture readonly, i64, i1) #1
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cheri128" "target-features"="+cheri128,+chericap,-noabicalls" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!3 = !{i64 0, i64 16, !4}
!4 = !{!5, !5, i64 0}
!5 = !{!"any pointer", !6, i64 0}
!6 = !{!"omnipotent char", !7, i64 0}
!7 = !{!"Simple C/C++ TBAA"}