<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/129843>129843</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            LLVM 20 miscompiles `@llvm.ctpop.i128` for `aarch64_be`
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            backend:AArch64,
            regression,
            miscompilation
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          alexrp
      </td>
    </tr>
</table>

<pre>
    Consider this Zig program:

```zig
pub fn main() void {
    var x: u128 = 0b11111111000110001100010000100001000011000011100101010001;
    _ = &x;
 @import("std").process.exit(@popCount(x));
}
```

Running it with `qemu-aarch64_be` will produce `24` with LLVM 19, but `0` with LLVM 20.

Isolating the `@llvm.ctpop.i128` a bit:

```llvm
; ModuleID = 'BitcodeBuffer'
source_filename = "repro"
target datalayout = "E-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "aarch64_be-unknown-linux4.19.0-unknown"

@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@start.simplified_logic = internal unnamed_addr constant i1 false, align 1
@builtin.output_mode = internal unnamed_addr constant i2 -2, align 1

; Function Attrs: nosanitize_coverage nounwind skipprofile
define dso_local i32 @repro() #0 {
  %1 = alloca [16 x i8], align 16
  store i128 71803349708323153, ptr %1, align 16
  %2 = load i128, ptr %1, align 16
  %3 = call i128 @llvm.ctpop.i128(i128 %2)
  %4 = trunc i128 %3 to i8
  %5 = zext i8 %4 to i32
  ret i32 %5
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i128 @llvm.ctpop.i128(i128) #1

attributes #0 = { nosanitize_coverage nounwind skipprofile "frame-pointer"="all" "target-cpu"="generic" "target-features"="+enable-select-opt,+ete,+fp-armv8,+fuse-adrp-add,+fuse-aes,+neon,+trbe,+use-postra-scheduler,-addr-lsl-slow-14,-aes,-aggressive-fma,-alternate-sextload-cvt-f32-pattern,-altnzcv,-alu-lsl-fast,-am,-amvs,-arith-bcc-fusion,-arith-cbz-fusion,-ascend-store-address,-avoid-ldapur,-balance-fp-ops,-bf16,-brbe,-bti,-call-saved-x10,-call-saved-x11,-call-saved-x12,-call-saved-x13,-call-saved-x14,-call-saved-x15,-call-saved-x18,-call-saved-x8,-call-saved-x9,-ccdp,-ccidx,-ccpp,-chk,-clrbhb,-cmp-bcc-fusion,-cmpbr,-complxnum,-CONTEXTIDREL2,-cpa,-crc,-crypto,-cssc,-d128,-disable-latency-sched-heuristic,-disable-ldp,-disable-stp,-dit,-dotprod,-ecv,-el2vmsa,-el3,-exynos-cheap-as-move,-f32mm,-f64mm,-f8f16mm,-f8f32mm,-faminmax,-fgt,-fix-cortex-a53-835769,-flagm,-fmv,-force-32bit-jump-tables,-fp16fml,-fp8,-fp8dot2,-fp8dot4,-fp8fma,-fpac,-fprcvt,-fptoint,-fujitsu-monaka,-fullfp16,-fuse-address,-fuse-addsub-2reg-const1,-fuse-arith-logic,-fuse-crypto-eor,-fuse-csel,-fuse-literals,-gcs,-harden-sls-blr,-harden-sls-nocomdat,-harden-sls-retbr,-hbc,-hcx,-i8mm,-ite,-jsconv,-ldp-aligned-only,-lor,-ls64,-lse,-lse128,-lse2,-lsfe,-lsui,-lut,-mec,-mops,-mpam,-mte,-nmi,-no-bti-at-return-twice,-no-neg-immediates,-no-sve-fp-ld1r,-no-zcz-fp,-nv,-occmo,-outline-atomics,-pan,-pan-rwv,-pauth,-pauth-lr,-pcdphint,-perfmon,-pops,-predictable-select-expensive,-predres,-prfm-slc-target,-rand,-ras,-rasv2,-rcpc,-rcpc3,-rcpc-immo,-rdm,-reserve-lr-for-ra,-reserve-x1,-reserve-x10,-reserve-x11,-reserve-x12,-reserve-x13,-reserve-x14,-reserve-x15,-reserve-x18,-reserve-x2,-reserve-x20,-reserve-x21,-reserve-x22,-reserve-x23,-reserve-x24,-reserve-x25,-reserve-x26,-reserve-x27,-reserve-x28,-reserve-x3,-reserve-x4,-reserve-x5,-reserve-x6,-reserve-x7,-reserve-x9,-rme,-sb,-sel2,-sha2,-sha3,-slow-misaligned-128store,-slow-paired-128,-slow-strqro-store,-sm4,-sme,-sme2,-sme2p1,-sme2p2,-sme-b16b16,-sme-f16f16,-sme-f64f64,-sme-f8f16,-sme-f8f32,-sme-fa64,-sme-i16i64,-sme-lutv2,-sme-mop4,-sme-tmop,-spe,-spe-eef,-specres2,-specrestrict,-ssbs,-ssve-aes,-ssve-bitperm,-ssve-fp8dot2,-ssve-fp8dot4,-ssve-fp8fma,-store-pair-suppress,-stp-aligned-only,-strict-align,-sve,-sve2,-sve2-aes,-sve2-bitperm,-sve2-sha3,-sve2-sm4,-sve2p1,-sve2p2,-sve-aes,-sve-aes2,-sve-b16b16,-sve-bfscale,-sve-bitperm,-sve-f16f32mm,-tagged-globals,-the,-tlb-rmi,-tlbiw,-tme,-tpidr-el1,-tpidr-el2,-tpidr-el3,-tpidrro-el0,-tracev8.4,-uaops,-use-experimental-zeroing-pseudos,-use-fixed-over-scalable-if-equal-cost,-use-reciprocal-square-root,-v8.1a,-v8.2a,-v8.3a,-v8.4a,-v8.5a,-v8.6a,-v8.7a,-v8.8a,-v8.9a,-v8a,-v8r,-v9.1a,-v9.2a,-v9.3a,-v9.4a,-v9.5a,-v9.6a,-v9a,-vh,-wfxt,-xs,-zcm,-zcz,-zcz-fp-workaround,-zcz-gp" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!llvm.module.flags = !{}
```

Compiling this with `llc repro.ll -O0` with LLVM 19 and 20 yields this codegen diff:

```diff
--- repro.19.s  2025-03-05 08:29:31.485173087 +0100
+++ repro.20.s  2025-03-05 08:29:34.672295525 +0100
@@ -1,5 +1,5 @@
- .text
        .file   "repro"
+       .text
        .globl  repro // -- Begin function repro
        .p2align        2
 .type   repro,@function
@@ -16,15 +16,16 @@
        mov     x8, xzr
 str     x8, [sp]
        ldr     x8, [sp, #8]
-       ldr     d1, [sp]
-                                        // implicit-def: $q0
- fmov    d0, d1
+       ldr     d0, [sp]
+ // kill: def $q0 killed $d0
        mov     v0.d[1], x8
        rev64 v0.16b, v0.16b
        cnt     v0.16b, v0.16b
-       uaddlv  h0, v0.16b
-                                        // kill: def $q0 killed $h0
-       fmov    w0, s0
+       addv    b1, v0.16b
+ // implicit-def: $q0
+       fmov    s0, s1
+ rev32   v0.16b, v0.16b
+       mov     w0, v0.s[3]
        ldp     x29, x30, [sp, #16]             // 16-byte Folded Reload
        add     sp, sp, #32
        ret
```
</pre>

<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJysOV2P46qSv8b9gohsbCfOQz-ku09LRzp3r3T2arXalxaGcsIMBgawO92_fgXYjp2e2TsPG0WuD4oqqKIKg6lz4qwAHrP6KatfHujgL9o-UglXax5azT8en7VygoNF_iIc-h9xRsbqs6V9Vp6yPP73efp_inOWn8zQok6hngqVkSYjRzRqwVF2eMryE0IIjdSia1ae0FCQBmXlC8rbYvrleb565NvH9CwCXURekZWz1reoKSP768TLqlz0RlsfR0Gc5xkhGTnujNUMnNvBVcS2KjfaPOtBBeqakWP4Rx3Z4WU9vzTdvwelhDoj4dG78BeU7fMf0A-YUssu--qthWyfo3chZfAUHxgEEVIlrr-gv_76r3-g4piRZ9QOPjTm2zaS75KpP52W1Adj_hK1ZFUu5djvmDfa7ERBmtCTolb4r-EIkoEun9A_NB8k_Pky-ejwJDzTHJ6GrgObkUOWn5weLIO3TkhQtIdJklgwVgfH5SdP7Rk84tRTST90GHqS-QOHxQDYkEOelaeSxEcgiy0ZsH0VH1g0WXlqUpMo9ll5io9ALSJhfuWpIA1WZeqL_zNQr4Fcj8lbYeQy5lsk8KC-K_2usBRquFa74rjLZ15SEP5V3g5CeqF2n-L81lL2HRSPyoTyYBWVaFDBKfyNcm4R08p5qjwS-wqREEcqxVmhJilznlq_c6I3UnQC-JvUZ8F-S1-BOiod3FQW2_HpwZvBv_Waw2_pIwiTO2VpQbwOinmhFTp5b11IRqUdVcKLT3hjegRLz4CUHtS7UBy578IYq8PiyPITh04oQNzpN6kZlUiUJKTbtFZizmekzJecz0hdxPFSGTqgrH4q9uiKRJPVL6vx7aO089oCCsFHh6LJy7I6HvKmJGVRl0HYeBs1fumYkZpEM1JTHhX8O_EyijMqZbL3k_wiTWohdSgec8cqdvR2UAzN7SXyOkxpEqmjyCdcPRJN6hPaSxIFLPjkNlLXt0rz6-CEMYaViZTuLITQuA_FVhEywAZJPW0lxOJjwQ9WoR56bT8y0iitIE2AA5N09vAvZzwFcVoz1Hsr2sGDmyIbMu3w9NurJqRlZ2kP2Oi4aEP2lS8hWaXMSPADSbmMmRmWxjMosIJtBTqgfrDgFqmMPIEKE8cOJDCPtfEZeQ5sDwnpDKa2H5uJGhxgyq3BlPM1KygNlAKtEuZtO6kIAkY7byl27AKhoNqMPAcVFksnsZP6HRdV5EVFmJ7PFpwTI-Cup5EjY8L6MNSrD-sUs9HjLtRH6kPbJKU-2ZjQISrvqPOR7tNzTAas8BfcMoa7wQmtbjzWfq55joHiOGZWHDC41D9szVhyaoY4l5ZKqhjgzmBtokTbFfsIkx9w60UAYTliR0fg-FrkXzjFFw75wim_cKovnPoLp7nnfGEcI4Nxk6Dg14SYxLh8j0Da9tJGrDd3DmS9aaM3mO6NvKohuvz5n__xrz_--19_vvz9x19pMiZGlFmWwIfxOmLORQ5P9Qdz4eLalNSDYh9p8eALDFY4L9hGJA16Jp2fyBh5rn14nwgopLUBkoy9owmN7oTrh9IOswtQg6nDvR5j1LqS9HEW3b6akKYr9gu6NNNeqJ5Gj3XnaLYTV8y09XDFtC5xU9aHfXRxJ-k5derjaDptGeCStMLjb0NvcKxFcRF1pth3vUxoMwGuPbmh1YROedIZyhK0bEzjMD5UjogO34R3A-61ot-T-CBlMJJwt1njM-2GFhMLZxy3x-LWFPMl7tELL0UTg7Y3lgO5EFJ4sFRG_WcWwYVaDgo76XAr7R1HaaZ7Tv0d24JPK-3SRtsXFj0vmhQNEasX_uaYVtHFkhsctzDgWCv5EXlpjNLtqwRhAtPykw5Igt3UMsQMlkMcTQ_Rcj-le29SgemTadVHWaVD3mPqcdpVsH8XDKYWBWcs-h64oD6FW2nsxlhEJC_sxPlkn7iL6znNRTPWx3zRg5dCAaZe9yL50lA1AWzfx4QO_rIgODnYMG4u05owYLs-5a-Z5mIscMH8emOAqwEV6vHcbmES7XrsJMNpiwksSxVP0E1gjH60zLAZljMSHBAnY3l0nwUHdgQsbUgLbOmaeS22VL4l71rJliy3ZLUl6y3ZbMitJrI1S7ZmyZ3w1izZmiVbs2S_JQ9bcjuoreKt3q3ardat0liQbB_D6mJZdyDjFNyFzjCaipt0L9ycRgVp4q64tBkqbOIvLOftD6vxTa6vEpiolGABmmLBZh5ui32bKlOgumLfrah91e2rhWrWTaEqLwS9SYliL26UHPy4iPXaLA2-1zHbnIEJYIBuQpkFR1a4t4LFVe9c6xIc59ehhLfCG7D9Qq8K-Iqu1vRUytNLR3ArdoMxc1l2_mstS-NI7EinRHXj5OERyDKogK8HFeglyJGYojQuYRmXsKwml9CFvYpWoDrHqISF3NiLsZx3Tk_PZ-D4LHU77Qv-Evt52WKbyqiXrXiPSFo53ghuMchiTZA1US6E1RhkTFlvKYOx2cXZDXQqdmFXCtXNih6UpxJ_gtVCnbFxMHC9yHTiGhw-gsVharE4ig7Dj4FKzHR6yQyCFpgwNpw7sPsxUAvYah1bx2ZX0AkhM1LOSDUj9YzsZ-QwI82MHCdkArGqj8dZ_XFWf5zVH2f1x1n9cVY_qYqbxHt3jSO9xml_sj6BzwmEneld2-_U6iEV-cA8m3jMiAex7XGnWB13_r_OYeh24iNFPIH18Y5mF16r3HSTUYQD9M_uoJ51b4RMF0PCLfdQUjIUD-E7KRH-Z35_5YSo4ojk6EOA5C71ZZrDGRTiouu-3iFFbn7CGE-Ki-POIURyUuO8xHmN8iYrT-SYlaey2FVNXRzKvDmgjDzlRR6HS57Sf9JA8l9rqHb7AyHHuib1WkOVZ1WOcMiTyJ-QyA6jQzsPVz9dBIbfLp450d0FVhjD1H4vH_JWojRClJHXjLwijNETnIVC3XweT7pW3QxJNwrTLx7td_7DBNuT4eesymcF67mEGlOk2UR0f5vPrK7XY4TxjIOunza0OW9XzKx-ciarX1a9JP8qEBBSNkkQ3wny4k7TLPBvf5Oj4k0XEx5zCEsIZaT6keLSTVPgoXIFQ-sgLPbzO_tBZFL9XUgZVHLoktrIAR4I_jNfjfmOZ_VTMV0sXZuVjIVxXwWJYh_eEWbsJsCUn5Xci8w-GSjnckTokv-0-Xdd9n_N65KvFM4efI_2XL7xIOU8NrbFZjAr__0qNDcdswGXDMwhsjCW5OeuuPWdnf4-O8Nl9VN5vxxNhFcSb7yvZX6_LIt9Vr_8zEnFHrcfHtCrlhw4-hukpnylmnIeYdK06Jtu2OaY-3U9e-CPJT-WR_oAj8WhKor6eCyqh8tjUTWwb_OcHlt-ZAdKgLISakpIW9ZdWTyIx1Cy8jKv80NZ1NWuKzpaNV3BqmNeFcCzKoeeCrmL1Vzb84NwboDHghybqnyQtAXp4ocWQqY75qw8nU7xrjreYj3HcpWujLRaWL1wLNZ76id2_fJgH4MZ3A5nl1W5FM67m2EvvITH6VsCWvqH3exX3xA6bUPj5iPGw2Dl48V748LGEGNyFv4ytDum-4y8xg8MCWBj9TcI75CvcdYuI6_TxMdH8r8BAAD__2C106k">