function call is inappropriately inlined
Closed this issue · 1 comments
andrewrk commented
Example extracted from ratfactor/ziglings#129 (thanks @ratfactor!)
const std = @import("std");
pub fn panic(msg: []const u8, st: ?*std.builtin.StackTrace) noreturn {
_ = msg;
_ = st;
unreachable;
}
export fn entry() bool {
doTheTest() catch return false;
return true;
}
fn doTheTest() !void {
const llamas1 = makeLlamas(5);
const llamas2 = makeLlamas(5);
_ = llamas1;
_ = llamas2;
}
fn makeLlamas(count: usize) [count]u8 {
var temp: [count]u8 = undefined;
var i: u8 = 0;
while (i < count) : (i += 1) {
temp[i] = i;
}
return temp;
}
$ stage3/bin/zig build-obj test2.zig --verbose-llvm-ir --strip -OReleaseFast
LLVM Emit Object... ; ModuleID = 'test2'
source_filename = "test2"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@builtin.output_mode = internal unnamed_addr constant i2 -2, align 1
; Function Attrs: nounwind
define internal fastcc i16 @test2.doTheTest() unnamed_addr #0 {
Entry:
%0 = alloca i8, align 1
%1 = alloca [5 x i8], align 1
%2 = alloca i8, align 1
%3 = alloca [5 x i8], align 1
store i8 0, i8* %2, align 1
br label %Loop
Loop: ; preds = %Block, %Entry
%4 = load i8, i8* %2, align 1
%5 = icmp ult i8 %4, 5
br i1 %5, label %Then, label %Else
Then: ; preds = %Loop
%6 = load i8, i8* %2, align 1
%7 = zext i8 %6 to i64
%8 = getelementptr inbounds [5 x i8], [5 x i8]* %3, i32 0, i64 %7
%9 = load i8, i8* %2, align 1
store i8 %9, i8* %8, align 1
br label %Block
Else: ; preds = %Loop
br label %Block1
Block: ; preds = %Then
%10 = load i8, i8* %2, align 1
%11 = add nuw i8 %10, 1
store i8 %11, i8* %2, align 1
br label %Loop
Block1: ; preds = %Else
store i8 0, i8* %0, align 1
br label %Loop2
Loop2: ; preds = %Block5, %Block1
%12 = load i8, i8* %0, align 1
%13 = icmp ult i8 %12, 5
br i1 %13, label %Then3, label %Else4
Then3: ; preds = %Loop2
%14 = load i8, i8* %0, align 1
%15 = zext i8 %14 to i64
%16 = getelementptr inbounds [5 x i8], [5 x i8]* %1, i32 0, i64 %15
%17 = load i8, i8* %0, align 1
store i8 %17, i8* %16, align 1
br label %Block5
Else4: ; preds = %Loop2
br label %Block6
Block5: ; preds = %Then3
%18 = load i8, i8* %0, align 1
%19 = add nuw i8 %18, 1
store i8 %19, i8* %0, align 1
br label %Loop2
Block6: ; preds = %Else4
ret i16 0
}
; Function Attrs: nounwind
define dso_local i1 @entry() #0 {
Entry:
%0 = call fastcc i16 @test2.doTheTest()
ret i1 true
}
attributes #0 = { nounwind "frame-pointer"="none" "target-cpu"="skylake" "target-features"="-16bit-mode,-32bit-mode,-3dnow,-3dnowa,+64bit,+adx,+aes,-amx-bf16,-amx-int8,-amx-tile,+avx,+avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,+bmi,+bmi2,-branchfusion,-cldemote,+clflushopt,-clwb,-clzero,+cmov,+crc32,+cx16,+cx8,-enqcmd,+ermsb,+f16c,-false-deps-lzcnt-tzcnt,+false-deps-popcnt,-fast-11bytenop,+fast-15bytenop,-fast-7bytenop,-fast-bextr,+fast-gather,-fast-hops,-fast-lzcnt,-fast-movbe,+fast-scalar-fsqrt,-fast-scalar-shift-masks,+fast-shld-rotate,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,-fast-vector-shift-masks,+fma,-fma4,+fsgsbase,-fsrm,+fxsr,-gfni,-hreset,-idivl-to-divb,+idivq-to-divl,+invpcid,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,+lzcnt,+macrofusion,+mmx,+movbe,-movdir64b,-movdiri,-mwaitx,+nopl,-pad-short-functions,+pclmul,-pconfig,-pku,+popcnt,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefetchwt1,+prfchw,-ptwrite,-rdpid,+rdrnd,+rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,+sahf,-serialize,-seses,+sgx,-sha,-shstk,+slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-soft-float,+sse,+sse2,+sse3,+sse4.1,+sse4.2,-sse4a,-sse-unaligned-mem,+ssse3,-tagged-globals,-tbm,-tsxldtrk,-uintr,-use-aa,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-vaes,-vpclmulqdq,+vzeroupper,-waitpkg,-wbnoinvd,-widekl,+x87,-xop,+xsave,+xsavec,+xsaveopt,+xsaves" }
Here you can see this code is lowered to 2 inline calls. Instead, I would expect 1 generic function instantiation called at runtime, or the same behavior as stage1 which is a compile error:
$ stage3/bin/zig build-obj test2.zig --verbose-llvm-ir --strip -OReleaseFast -fstage1
./test2.zig:21:30: error: use of undeclared identifier 'count'
fn makeLlamas(count: usize) [count]u8 {
^
Indeed, this is the subject of Exercise 074 of ziglings, which explores this compile error.
This comment seems related:
Sema.zig:6639 in fn instantiateGenericCall
// Similarly, if the call evaluated to a generic type we need to instead
// call it inline.
if (new_fn_info.is_generic or new_fn_info.cc == .Inline) {
return error.GenericPoison;
}