<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/56403>56403</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            LLVM 14 regression: __muloti4 is lowered with a recursive call despite nobuiltin attribute
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            bug,
            regression,
            miscompilation
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          andrewrk
      </td>
    </tr>
</table>

<pre>
    This is a regression from 13.0.1 to 14.0.6. The reproduction I have here is for aarch64-linux, however, I can also reproduce it for wasm32-wasi.

To help provide an overview of the IR, here is the high level source code:

```zig
const std = @import("std");
const builtin = @import("builtin");

export fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 {
    return muloXi4_genericSmall(i128, a, b, overflow);
}

inline fn muloXi4_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
    overflow.* = 0;
    const min = std.math.minInt(ST);
    var res: ST = a *% b;
    if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b))
        overflow.* = 1;
    return res;
}
```

Input LLVM IR:

```llvm
; ModuleID = 'compiler_rt'
source_filename = "compiler_rt"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-unknown"

@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@builtin.output_mode = internal unnamed_addr constant i2 -2, align 1

; Function Attrs: nobuiltin nounwind
define dso_local i128 @__muloti4(i128 %0, i128 %1, i32* nonnull align 4 %2) #0 {
Entry:
  %3 = alloca i128, align 16
  %4 = alloca i128, align 16
  store i32 0, i32* %2, align 4
  %5 = mul i128 %0, %1
  store i128 %5, i128* %3, align 16
  %6 = icmp slt i128 %0, 0
  br i1 %6, label %Then, label %Else

Then:                                             ; preds = %Entry
  %7 = icmp eq i128 %1, -170141183460469231731687303715884105728
  br label %Block

Else:                                             ; preds = %Entry
  br label %Block

Block:                                            ; preds = %Else, %Then
  %8 = phi i1 [ %7, %Then ], [ false, %Else ]
  br i1 %8, label %Then1, label %Else2

Then1:                                            ; preds = %Block
  br label %Block6

Else2:                                            ; preds = %Block
  %9 = icmp ne i128 %0, 0
  br i1 %9, label %Then3, label %Else4

Then3:                                            ; preds = %Else2
  %10 = load i128, i128* %3, align 16
  %11 = sdiv i128 %10, %0
  %12 = icmp ne i128 %11, %1
  br label %Block5

Else4:                                            ; preds = %Else2
  br label %Block5

Block5:                                           ; preds = %Else4, %Then3
  %13 = phi i1 [ %12, %Then3 ], [ false, %Else4 ]
  br label %Block6

Block6:                                           ; preds = %Block5, %Then1
  %14 = phi i1 [ true, %Then1 ], [ %13, %Block5 ]
  br i1 %14, label %Then7, label %Else8

Then7:                                            ; preds = %Block6
  store i32 1, i32* %2, align 4
  br label %Block9

Else8:                                            ; preds = %Block6
  br label %Block9

Block9:                                           ; preds = %Else8, %Then7
  %15 = load i128, i128* %3, align 16
  store i128 %15, i128* %4, align 16
  %16 = load i128, i128* %4, align 16
  ret i128 %16
}

attributes #0 = { nobuiltin nounwind "frame-pointer"="none" "target-cpu"="generic" "target-features"="-a510,-a65,-a710,-a76,-a78,-a78c,-aes,-aggressive-fma,-alternate-sextload-cvt-f32-pattern,-altnzcv,-am,-amvs,-arith-bcc-fusion,-arith-cbz-fusion,-balance-fp-ops,-bf16,-brbe,-bti,-call-saved-x10,-call-saved-x11,-call-saved-x12,-call-saved-x13,-call-saved-x14,-call-saved-x15,-call-saved-x18,-call-saved-x8,-call-saved-x9,-ccdp,-ccidx,-ccpp,-cmp-bcc-fusion,-complxnum,-CONTEXTIDREL2,-cortex-r82,-crc,-crypto,-custom-cheap-as-move,-disable-latency-sched-heuristic,-dit,-dotprod,-ecv,-el2vmsa,-el3,+ete,-exynos-cheap-as-move,-f32mm,-f64mm,-fgt,-fix-cortex-a53-835769,-flagm,-force-32bit-jump-tables,-fp16fml,+fp-armv8,-fptoint,-fullfp16,-fuse-address,+fuse-aes,-fuse-arith-logic,-fuse-crypto-eor,-fuse-csel,-fuse-literals,-harden-sls-blr,-harden-sls-nocomdat,-harden-sls-retbr,-hbc,-hcx,-i8mm,-jsconv,-lor,-ls64,-lse,-lse2,-lsl-fast,-mops,-mpam,-mte,+neon,-no-bti-at-return-twice,-no-neg-immediates,-no-zcz-fp,-nv,-outline-atomics,-pan,-pan-rwv,-pauth,+perfmon,-predictable-select-expensive,-predres,-rand,-ras,-rcpc,-rcpc-immo,-rdm,-reserve-x1,-reserve-x10,-reserve-x11,-reserve-x12,-reserve-x13,-reserve-x14,-reserve-x15,-reserve-x18,-reserve-x2,-reserve-x20,-reserve-x21,-reserve-x22,-reserve-x23,-reserve-x24,-reserve-x25,-reserve-x26,-reserve-x27,-reserve-x28,-reserve-x3,-reserve-x30,-reserve-x4,-reserve-x5,-reserve-x6,-reserve-x7,-reserve-x9,-rme,-sb,-sel2,-sha2,-sha3,-slow-misaligned-128store,-slow-paired-128,-slow-strqro-store,-sm4,-sme,-sme-f64,-sme-i64,-spe,-spe-eef,-specrestrict,-ssbs,-streaming-sve,-strict-align,-sve,-sve2,-sve2-aes,-sve2-bitperm,-sve2-sha3,-sve2-sm4,-tagged-globals,-tlb-rmi,-tme,-tpidr-el1,-tpidr-el2,-tpidr-el3,-tracev8.4,+trbe,-uaops,-use-experimental-zeroing-pseudos,+use-postra-scheduler,-use-reciprocal-square-root,-use-scalar-inc-vl,-v8.1a,-v8.2a,-v8.3a,-v8.4a,-v8.5a,-v8.6a,-v8.7a,-v8.8a,-v8a,-v8r,-v9.1a,-v9.2a,-v9.3a,-v9a,-vh,-wfxt,-xs,-zcm,-zcz,-zcz-fp-workaround,-zcz-gp" }
```

Note that the `__muloti4` function has the `nobuiltin` attribute. After optimizations are applied, the IR transforms to:

```llvm
; ModuleID = 'compiler_rt'
source_filename = "compiler_rt"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-unknown"

; Function Attrs: mustprogress nobuiltin nofree nosync nounwind willreturn writeonly
define dso_local i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align 4 %2) local_unnamed_addr #0 {
Entry:
  store i32 0, i32* %2, align 4
  %.fr = freeze i128 %1
  %mul = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %0, i128 %.fr)
  %3 = icmp slt i128 %0, 0
  %4 = icmp eq i128 %.fr, -170141183460469231731687303715884105728
  %5 = and i1 %3, %4
  br i1 %5, label %Then7, label %Else2

Else2:                                            ; preds = %Entry
  %mul.ov = extractvalue { i128, i1 } %mul, 1
  br i1 %mul.ov, label %Then7, label %Block9

Then7:                                            ; preds = %Else2, %Entry
  store i32 1, i32* %2, align 4
  br label %Block9

Block9:                                           ; preds = %Else2, %Then7
  %mul.val = extractvalue { i128, i1 } %mul, 0
  ret i128 %mul.val
}

; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare { i128, i1 } @llvm.smul.with.overflow.i128(i128, i128) #1

attributes #0 = { mustprogress nobuiltin nofree nosync nounwind willreturn writeonly "frame-pointer"="none" "target-cpu"="generic" "target-features"="-a510,-a65,-a710,-a76,-a78,-a78c,-aes,-aggressive-fma,-alternate-sextload-cvt-f32-pattern,-altnzcv,-am,-amvs,-arith-bcc-fusion,-arith-cbz-fusion,-balance-fp-ops,-bf16,-brbe,-bti,-call-saved-x10,-call-saved-x11,-call-saved-x12,-call-saved-x13,-call-saved-x14,-call-saved-x15,-call-saved-x18,-call-saved-x8,-call-saved-x9,-ccdp,-ccidx,-ccpp,-cmp-bcc-fusion,-complxnum,-CONTEXTIDREL2,-cortex-r82,-crc,-crypto,-custom-cheap-as-move,-disable-latency-sched-heuristic,-dit,-dotprod,-ecv,-el2vmsa,-el3,+ete,-exynos-cheap-as-move,-f32mm,-f64mm,-fgt,-fix-cortex-a53-835769,-flagm,-force-32bit-jump-tables,-fp16fml,+fp-armv8,-fptoint,-fullfp16,-fuse-address,+fuse-aes,-fuse-arith-logic,-fuse-crypto-eor,-fuse-csel,-fuse-literals,-harden-sls-blr,-harden-sls-nocomdat,-harden-sls-retbr,-hbc,-hcx,-i8mm,-jsconv,-lor,-ls64,-lse,-lse2,-lsl-fast,-mops,-mpam,-mte,+neon,-no-bti-at-return-twice,-no-neg-immediates,-no-zcz-fp,-nv,-outline-atomics,-pan,-pan-rwv,-pauth,+perfmon,-predictable-select-expensive,-predres,-rand,-ras,-rcpc,-rcpc-immo,-rdm,-reserve-x1,-reserve-x10,-reserve-x11,-reserve-x12,-reserve-x13,-reserve-x14,-reserve-x15,-reserve-x18,-reserve-x2,-reserve-x20,-reserve-x21,-reserve-x22,-reserve-x23,-reserve-x24,-reserve-x25,-reserve-x26,-reserve-x27,-reserve-x28,-reserve-x3,-reserve-x30,-reserve-x4,-reserve-x5,-reserve-x6,-reserve-x7,-reserve-x9,-rme,-sb,-sel2,-sha2,-sha3,-slow-misaligned-128store,-slow-paired-128,-slow-strqro-store,-sm4,-sme,-sme-f64,-sme-i64,-spe,-spe-eef,-specrestrict,-ssbs,-streaming-sve,-strict-align,-sve,-sve2,-sve2-aes,-sve2-bitperm,-sve2-sha3,-sve2-sm4,-tagged-globals,-tlb-rmi,-tme,-tpidr-el1,-tpidr-el2,-tpidr-el3,-tracev8.4,+trbe,-uaops,-use-experimental-zeroing-pseudos,+use-postra-scheduler,-use-reciprocal-square-root,-use-scalar-inc-vl,-v8.1a,-v8.2a,-v8.3a,-v8.4a,-v8.5a,-v8.6a,-v8.7a,-v8.8a,-v8a,-v8r,-v9.1a,-v9.2a,-v9.3a,-v9a,-vh,-wfxt,-xs,-zcm,-zcz,-zcz-fp-workaround,-zcz-gp" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
```

Note in particular the call to `@llvm.smul.with.overflow.i128`. This will lower to a call to `__muloti4` itself, causing an infinite loop. In LLVM 13.0.1, this instead optimized to a `mul` instruction, avoiding the problem.

If this is all working as designed, then can I please be advised - how is this use case intended to be solved? As far as I can tell, LLVM's own compiler-rt is just getting "lucky" that this is not happening to its `__muloti4` symbol, since its implementation is slightly more convoluted.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJztGttu47jua9IXwYEtO7eHPkznAhTY3QOcKQ72rVBsOdGMbHklOWn79YekbMdO2s7MTh876EQSTfEiURRJe2uKx-u7vXIM_gSzcmelc8rUrLSmYkk6j-cJ84YlGfSWc3a3l4DVWFO0uUe8W7YXB8n20kqkURrLhLD5fplFWtXtw4x_ZHtzlAdpsXvLclEzoZ0ZyMA8T_OOwlUpj6BR81n8aRZ_CL93BsjrhgH6QRWSAQED5A5KHpkpmQeRbv9LfDohELJXuz3TwFYzZ1oLXHJTyFn6YUx5tozD35PaBUhuaueZ8wWbpZ_YLItV1RjrZ3w94xzA8Dvjm1l6M0bftkp7VT83pXt0Ni38ygfEY2XN7u-rVhuvMpgkQEamEpz9kW3HA1S61OaIsBn_kN-rGrhsYEW1BkEOMHn-EQE4gc1WHTMG_6z0ra0ZcvlbZfc7WUur8q8VzIRZPQMRWI5ZjYVefRpLr2rYX4nSv0A1N1XjVSXZ1zuU2D82MjCBAYB67UL3Jd2-3k0V6fHmgETrHQ_y4eOwH1W3F7Bf80r4_RwAt0hvjcw2kxkHYWF1XJCEZgkUYMYXKN4IUZWMtnQNz9OPLAYzLNgWZ-CkCvd4w8CKAwZPSDhCApMo1OHOtnUOD5FZWOtNj7ZFoeBv4PWspslEnG5HSfSLDerNerxft3XTevbHH__7E0_LC-dA60PVgdIb9iccTy1vPwXD5ivcUaWlvUfrXgW8cLjuS4DXAjY7oPIJKg-oXtid9KwQXmjxaECaDllGFQgkI7WGBv-DF1DJEjr0g6NlBh3wKWSqAOfrqE55gH0l850y8VY1epCm90ht_b02xzp4pn40TO1-s7g7tHPwCvdbkX-XdXAHYJPS1kKztkZdi3tRFDbYnKg9AyEZp73Valez9QU9UBn24L4CR_RT9DiLRvSSiZSwPV_AoMgHf_DekgXXpndFtWnro6qLgF3IEs9q4cy9NuAuOg-RxWPHE2B8ESPLfpDQAFaaI_W6brXuxMnwMQ9WnManU_q59vZxsC-GWGk4Vxp5D96sU2o5wst-Bs95g14-5SweyRZk6bGzEdEFEQU12URB0m1Ksnu86PXv6KYvSLsMe5hXDXPaT6nHPd7WwgPCRrAWW7iPYATXaD0BfNZOTu48REgHZ_BT_9AmGisL15n9ImzFSeLVSWL5z3SLo2QVJ1mSrNNsGWfLDU-TVZos16s0TlfJYr3Oknix4uuRXoPsN7Bf38fCkzJvKvyr7ALg1_hdskOZg13Q2p9WbU0ozV7RTi5uaCVHqAD7REN4VIoTGaRIz85NYX1hCsmFLfBzY0h-V8HRsj2znsvz_eNvyg_Gm5Px1fJHp2VzsUTpxRJl50uUvoUN8JHISUxPtBHF4I5-wjEkSYg-4NI_HbLe6cRjRP7smiTJuYO62K7F-XZlb6r76_w6yC8xfJZfNjpF6Xhd0meOXMLH2K8duuzs1L1i6R3k91Tp1uMkXTLWJTvXxdt27GmSsSqkfPcwkH3WgyTZxflYXZyP9fn5WL3JkX7mIk5-fBFfbMPm3ILXbyvd6ww7yO-b8Hq0k6vxti9-3XVMw5DkPA7JXnI3y1dZPTsNsocTo_5ETPM7AUGl2rZeui7AQ6VXN8_EmBhhlxYC2KgxFNJiRA1pEecQMYKpc0QIcXmUN-3wtMsXpwilFJDYYI7UYUUiOM9ILBfUrLrhahmaddfk1FJ2FYldKGQcZFRWgiCaYm0vIycfPC5WlB-AHSQXDagKzzqs-ik_ULcKv4dA0Cq_j7Z5HpUt1kdOsHz7NIJtIbWpc-DaRKahmdsyIUG3diup9QobTNojJw6yiB6CQhNIcgHhF5D0ApJdQBYXkPU55AKwIUBeNKFVxUPoNAFQNWfrgLmefqhbWrCP__nr7vPfd7ef_vv5jyCysV4-RHYdRjYPzWPjDfVaMPsqyvdSNJFwUQV5L8IL5cRWy0jDltX5Y-QAo4j2srXKeZUHFE-N8VhIwq4MWyc1P1ROhC6u0ozfSE9k5cNjbdwlO7CDiuQvl1nX2RH1Uj30KohFGq3TxWpJC1RqsQuIBhLgKOVb5aNvLayOR8Fp78smWZaVDhKASQhbHdbhgTdU4YAupFSIF_pORpgEgu12kwjSUaM-GZ02u7AGBAurGUljTyAn9TDQCuwbLkkE7IUtZB057aKttmeQ2sBeQn5-BgZvsQ2oW2K6z8ki1Dqs1DcXqk8fQSxC026ZhVZ2DQ-tjkrhiHrVnY6qCeesou0BhWsZjKo2eFIi4aNQ6Yj8UeWye1LLXaQqyJgVWIfrgE85HEQy0SAMZNtYoQISplI5YTWi7prIHg-h2_p94NxIW1aBObp5ldM-grvQMvdgOI2s0aH0z0MdJ7KiLkIbhnmT9y2KSDZuC9IRZkgLHukhmY7i6fDsKZ8O0-kwmw4X0-F6MpxS4lO2fMqWnyFP2fIpWz5ly5fT4Wo6nAo1JZxOhZqymXKZMpnyoANqK9ort6Vf8AnU7kXfEmenzTGqwNfgBQkOBu5EuoaHZ41QNsAHkPP2H2uiE16VhaYbSXQifVd13UZ2TSRl2XVzEBgu2ZxOhHNbsiCASFGpehe5YGwBJSIJadyBD-FUYdt7COqDHwJTrobxoCoNgqweLkjQaqfNtvMLXm9hxehq8kER36jCggNNxgM-HhBZb0UuD-t5Fk6R7665VnQnHB0QHh6rKll7oaMnaQ2q1zjZFqbzc4jVGFBVBE_fanpfQLOtzBV4eLiiIvdPKyxAjPH9UwdwYSNV59GBXB7Ikoiuw_tO2neyvrPoO8u-s-o7667TNSTIYdNT3fRUNz3VTWjQj0TH8oFEeyDln_IqNE9dg4HB0djvwkLwVPTAXUNB0Kv127-Ml8zvhafXG_DwVLtbxqzsa4F74XqEIVJDhCGcm7MPJVwHzGBxXj0JnOUYrCoTTaOVRKG6dyqQqojawf1WAU3zXjN-uWb8bDW2gtAG7JYi0UnYXFopoXGPdX4Koo9K666of4Q7Hq5B_fi21dtcNBhaD3Xcgc1FRZfY3E-q0q8XeX-1KDsvLa0zrsTTKOcZoWDFFlG8UJpecVH2cUpw8LjgIqD5zR1gz48QGc2H1yYB8_l1Afaj9y1DhfoHtdyhQn1eQSVyv15DHYrT-JIoJPZ97j_Om8MTygZ_kPHz85z6t4t45xVkXGZzoKeQRYHn9wehW_ns1hA2ApILZQKZH2h0mbW_SRHjcxeOXqj3hvWMNysvjGteq7N9gJX_xY0YLHmc_neknq0BvPSW6VkHBmFLgQk_w8imhbwNw-eRW-udWa7xsvnXh3k2lDfozdPkrdhLNYvf98TvNY73Gsd7jeO9xvFe43ivcbzXON5rHO81Dv57NY5pqJaMXi_9q-DyZ4onEPE1wsLFCvMtlTgorfQGiyU_Cj6XMX78qhzxhAT5KC3OFGMak4qM8nAe8RQABoQR9Q6_W1U1pPJwUwEB08zZbR0-yAvf2c6o8ILf4tbOg759iUYWgRNQxUAeaQOCDR_g4iRxMKpADqgTWBEsTTX5gva27Ag7_LqK4eaQQI4V0pFTCLxlTZ_n3rJGS-Ek20omioNyIEGEX_GGj2vhB4wSMJ2kT9jqIkgI2M5oCK9m6Rf2wbESVhlYhE9-vdSUg6C-M75yzByBWVfxiaxH0t8gTmIQIHsUDqxFt_n3R7SaruwVNKiNZ3vRwFVFKhtc6YvVd4_V1hBDWHr6xBjmQggn6YBSPgOkHPibvYfovsLUD294o8Eki_lVcZ0Wm3QjrrzyWl6HXcpGH0hjIjRwRFpkEhKzB7-nT6nz1uJdGgwE1rnBfT8lH8MBuGqtvt5732ByNeNf4G8HNNrtHJYHBlRfCw1cyuabRF_6RTnXokP8slhmcXq1vy6FXC_W2-Umz_JyAyFclsplmUm5yTJerLIrylfd9WxxQ98l7-ibZMwu-UirAQb3Rdgd4Tv44tOVuuYx5_EqXsbAlHNwF9mm4Mkqj2Wcr3gJp0hWQuk5nSVjd1f2mgQHfg6PGAS27vRQOLI-SUIBfQxUjL2GcMPKo_1-RUpek4b_B2a7o4E">