ziglang/zig

function call is inappropriately inlined

Closed this issue · 1 comments

Example extracted from ratfactor/ziglings#129 (thanks @ratfactor!)

const std = @import("std");

pub fn panic(msg: []const u8, st: ?*std.builtin.StackTrace) noreturn {
    _ = msg;
    _ = st;
    unreachable;
}

export fn entry() bool {
    doTheTest() catch return false;
    return true;
}

fn doTheTest() !void {
    const llamas1 = makeLlamas(5);
    const llamas2 = makeLlamas(5);
    _ = llamas1;
    _ = llamas2;
}

fn makeLlamas(count: usize) [count]u8 {
    var temp: [count]u8 = undefined;
    var i: u8 = 0;

    while (i < count) : (i += 1) {
        temp[i] = i;
    }

    return temp;
}
$ stage3/bin/zig build-obj test2.zig --verbose-llvm-ir --strip -OReleaseFast
LLVM Emit Object... ; ModuleID = 'test2'
source_filename = "test2"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@builtin.output_mode = internal unnamed_addr constant i2 -2, align 1

; Function Attrs: nounwind
define internal fastcc i16 @test2.doTheTest() unnamed_addr #0 {
Entry:
  %0 = alloca i8, align 1
  %1 = alloca [5 x i8], align 1
  %2 = alloca i8, align 1
  %3 = alloca [5 x i8], align 1
  store i8 0, i8* %2, align 1
  br label %Loop

Loop:                                             ; preds = %Block, %Entry
  %4 = load i8, i8* %2, align 1
  %5 = icmp ult i8 %4, 5
  br i1 %5, label %Then, label %Else

Then:                                             ; preds = %Loop
  %6 = load i8, i8* %2, align 1
  %7 = zext i8 %6 to i64
  %8 = getelementptr inbounds [5 x i8], [5 x i8]* %3, i32 0, i64 %7
  %9 = load i8, i8* %2, align 1
  store i8 %9, i8* %8, align 1
  br label %Block

Else:                                             ; preds = %Loop
  br label %Block1

Block:                                            ; preds = %Then
  %10 = load i8, i8* %2, align 1
  %11 = add nuw i8 %10, 1
  store i8 %11, i8* %2, align 1
  br label %Loop

Block1:                                           ; preds = %Else
  store i8 0, i8* %0, align 1
  br label %Loop2

Loop2:                                            ; preds = %Block5, %Block1
  %12 = load i8, i8* %0, align 1
  %13 = icmp ult i8 %12, 5
  br i1 %13, label %Then3, label %Else4

Then3:                                            ; preds = %Loop2
  %14 = load i8, i8* %0, align 1
  %15 = zext i8 %14 to i64
  %16 = getelementptr inbounds [5 x i8], [5 x i8]* %1, i32 0, i64 %15
  %17 = load i8, i8* %0, align 1
  store i8 %17, i8* %16, align 1
  br label %Block5

Else4:                                            ; preds = %Loop2
  br label %Block6

Block5:                                           ; preds = %Then3
  %18 = load i8, i8* %0, align 1
  %19 = add nuw i8 %18, 1
  store i8 %19, i8* %0, align 1
  br label %Loop2

Block6:                                           ; preds = %Else4
  ret i16 0
}

; Function Attrs: nounwind
define dso_local i1 @entry() #0 {
Entry:
  %0 = call fastcc i16 @test2.doTheTest()
  ret i1 true
}

attributes #0 = { nounwind "frame-pointer"="none" "target-cpu"="skylake" "target-features"="-16bit-mode,-32bit-mode,-3dnow,-3dnowa,+64bit,+adx,+aes,-amx-bf16,-amx-int8,-amx-tile,+avx,+avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,+bmi,+bmi2,-branchfusion,-cldemote,+clflushopt,-clwb,-clzero,+cmov,+crc32,+cx16,+cx8,-enqcmd,+ermsb,+f16c,-false-deps-lzcnt-tzcnt,+false-deps-popcnt,-fast-11bytenop,+fast-15bytenop,-fast-7bytenop,-fast-bextr,+fast-gather,-fast-hops,-fast-lzcnt,-fast-movbe,+fast-scalar-fsqrt,-fast-scalar-shift-masks,+fast-shld-rotate,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,-fast-vector-shift-masks,+fma,-fma4,+fsgsbase,-fsrm,+fxsr,-gfni,-hreset,-idivl-to-divb,+idivq-to-divl,+invpcid,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,+lzcnt,+macrofusion,+mmx,+movbe,-movdir64b,-movdiri,-mwaitx,+nopl,-pad-short-functions,+pclmul,-pconfig,-pku,+popcnt,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefetchwt1,+prfchw,-ptwrite,-rdpid,+rdrnd,+rdseed,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,+sahf,-serialize,-seses,+sgx,-sha,-shstk,+slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-shld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-soft-float,+sse,+sse2,+sse3,+sse4.1,+sse4.2,-sse4a,-sse-unaligned-mem,+ssse3,-tagged-globals,-tbm,-tsxldtrk,-uintr,-use-aa,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-vaes,-vpclmulqdq,+vzeroupper,-waitpkg,-wbnoinvd,-widekl,+x87,-xop,+xsave,+xsavec,+xsaveopt,+xsaves" }

Here you can see this code is lowered to 2 inline calls. Instead, I would expect 1 generic function instantiation called at runtime, or the same behavior as stage1 which is a compile error:

$ stage3/bin/zig build-obj test2.zig --verbose-llvm-ir --strip -OReleaseFast -fstage1
./test2.zig:21:30: error: use of undeclared identifier 'count'
fn makeLlamas(count: usize) [count]u8 {
                             ^

Indeed, this is the subject of Exercise 074 of ziglings, which explores this compile error.

This comment seems related:

Sema.zig:6639 in fn instantiateGenericCall

        // Similarly, if the call evaluated to a generic type we need to instead
        // call it inline.
        if (new_fn_info.is_generic or new_fn_info.cc == .Inline) {
            return error.GenericPoison;
        }

Thanks @Vexu I love the Llama test! 🦙 😄