<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/107088>107088</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[Aarch64] Missed folding of `and`+`cmpeq` -> `cmtst`
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Validark
</td>
</tr>
</table>
<pre>
[Zig godbolt](https://zig.godbolt.org/z/6dh4P1b5d)
```zig
fn bsl(selector: @Vector(16, u8), a: @Vector(16, u8), b: @Vector(16, u8)) @Vector(16, u8) {
return (a & selector) | (b & ~selector);
}
fn cmtest(a: @Vector(16, u8), c: u8) @Vector(16, u8) {
if (@popCount(c) != 1) unreachable;
const b = @as(@Vector(16, u8), @splat(c));
return @select(u8, (a & b) == b, @as(@Vector(16, u8), @splat(0xff)), @as(@Vector(16, u8), @splat(0)));
}
export fn choose(ret: @Vector(16, u8)) @Vector(16, u8) {
const shifted_left: @Vector(16, u8) = ret +% @as(@Vector(16, u8), @splat(14));
return bsl(cmtest(shifted_left, 2), shifted_left, ret);
}
```
Compiled for Apple M3:
```asm
choose:
movi v1.16b, #14
add v1.16b, v0.16b, v1.16b
movi v2.16b, #2
and v2.16b, v1.16b, v2.16b
cmeq v2.16b, v2.16b, #0
bif v0.16b, v1.16b, v2.16b
ret
```
I think it should be:
```asm
choose:
movi v1.16b, #14
add v1.16b, v0.16b, v1.16b
movi v2.16b, #2
cmtst v2.16b, v1.16b, v2.16b
bit v0.16b, v1.16b, v2.16b
ret
```
Here is the LLVM dump (`zig build-obj ./src/llvm_code.zig -O ReleaseFast -target aarch64-linux -mcpu apple_latest -femit-llvm-ir -fstrip`)
```llvm
; ModuleID = 'BitcodeBuffer'
source_filename = "llvm_code"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-musl"
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local <16 x i8> @choose(<16 x i8> %0) local_unnamed_addr #0 {
%2 = add <16 x i8> %0, <i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14>
%3 = and <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
%.not.i.not = icmp eq <16 x i8> %3, zeroinitializer
%4 = select <16 x i1> %.not.i.not, <16 x i8> %0, <16 x i8> %2
ret <16 x i8> %4
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="none" "target-cpu"="apple-latest" "target-features"="-a510,-a520,-a65,-a710,-a720,-a76,-a78,-a78c,-addr-lsl-fast,+aes,-aggressive-fma,+alternate-sextload-cvt-f32-pattern,+altnzcv,-alu-lsl-fast,+am,+amvs,+arith-bcc-fusion,+arith-cbz-fusion,-ascend-store-address,-b16b16,-balance-fp-ops,+bf16,-brbe,+bti,-call-saved-x10,-call-saved-x11,-call-saved-x12,-call-saved-x13,-call-saved-x14,-call-saved-x15,-call-saved-x18,-call-saved-x8,-call-saved-x9,+ccdp,+ccidx,+ccpp,-chk,-clrbhb,-cmp-bcc-fusion,+complxnum,+CONTEXTIDREL2,-cortex-r82,-cpa,+crc,+crypto,-cssc,-d128,+disable-latency-sched-heuristic,-disable-ldp,-disable-stp,+dit,+dotprod,+ecv,+el2vmsa,+el3,-enable-select-opt,-ete,-exynos-cheap-as-move,-f32mm,-f64mm,-faminmax,+fgt,-fix-cortex-a53-835769,+flagm,-fmv,-force-32bit-jump-tables,+fp16fml,-fp8,-fp8dot2,-fp8dot4,-fp8fma,+fp-armv8,-fpmr,+fptoint,+fullfp16,+fuse-address,-fuse-addsub-2reg-const1,-fuse-adrp-add,+fuse-aes,+fuse-arith-logic,+fuse-crypto-eor,+fuse-csel,+fuse-literals,-gcs,-harden-sls-blr,-harden-sls-nocomdat,-harden-sls-retbr,-hbc,+hcx,+i8mm,-ite,+jsconv,-ldp-aligned-only,+lor,-ls64,+lse,-lse128,+lse2,-lut,-mec,-mops,+mpam,-mte,+neon,-nmi,-no-bti-at-return-twice,-no-neg-immediates,-no-sve-fp-ld1r,-no-zcz-fp,+nv,-outline-atomics,+pan,+pan-rwv,+pauth,-pauth-lr,+perfmon,-predictable-select-expensive,+predres,-prfm-slc-target,-rand,+ras,-rasv2,+rcpc,-rcpc3,+rcpc-immo,+rdm,-reserve-x1,-reserve-x10,-reserve-x11,-reserve-x12,-reserve-x13,-reserve-x14,-reserve-x15,-reserve-x18,-reserve-x2,-reserve-x20,-reserve-x21,-reserve-x22,-reserve-x23,-reserve-x24,-reserve-x25,-reserve-x26,-reserve-x27,-reserve-x28,-reserve-x3,-reserve-x30,-reserve-x4,-reserve-x5,-reserve-x6,-reserve-x7,-reserve-x9,-rme,+sb,+sel2,+sha2,+sha3,-slow-misaligned-128store,-slow-paired-128,-slow-strqro-store,-sm4,-sme,-sme2,-sme2p1,-sme-f16f16,-sme-f64f64,-sme-f8f16,-sme-f8f32,-sme-fa64,-sme-i16i64,-sme-lutv2,-spe,-spe-eef,-specres2,+specrestrict,+ssbs,-ssve-fp8dot2,-ssve-fp8dot4,-ssve-fp8fma,+store-pair-suppress,-stp-aligned-only,-strict-align,-sve,-sve2,-sve2-aes,-sve2-bitperm,-sve2-sha3,-sve2-sm4,-sve2p1,-tagged-globals,-the,+tlb-rmi,-tlbiw,-tme,-tpidr-el1,-tpidr-el2,-tpidr-el3,-tpidrro-el0,+tracev8.4,-trbe,+uaops,-use-experimental-zeroing-pseudos,-use-postra-scheduler,-use-reciprocal-square-root,-use-scalar-inc-vl,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,-v8.7a,-v8.8a,-v8.9a,+v8a,-v8r,-v9.1a,-v9.2a,-v9.3a,-v9.4a,-v9.5a,-v9a,+vh,-wfxt,-xs,+zcm,+zcz,-zcz-fp-workaround,+zcz-gp" }
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzUWU1v4zjS_jXKhaAhUbKsHHJInA7eAabfWcwOBou9NCiqZLObItkkpTg57G9f8EOy5STzAQwWu42OWfVUsapYZBVlmVrLDxLgLts-ZNvHGzq6ozJ3v1LBO2q-3bSqe_Gyf_IDOqiuVcJl28eMNEfntM3K-4w8ZeTplR82SbxR5uCRjDzV3bH6W9Fuu4zcZvljlt-nzzqP_1_5ISK9RK0VGWksCGBOmay8R1mV_xoZ0hR1RvZobLwlskf0d-Ttb8pvPxKhbPcQA0IIIQNuNBJlpKEoIzVaYguKey9og-BfF5KsTBay3ePlmnuJ2ODAOm_ud6JnXp4C-iOB8t7HklW5VnqvRul9sKBFiqx8RIWnR2mAsiNtBSwx-slMSetQi7xiVuXURlMfxZZVudWCzi4uV3yZsyqPOclI46fuz1lsQ2Dlo_fXJot_wml-6vvk98_PnSd-uEtw0so45DfrqJSFjDQG3F9xmGKa7ZH3DrovAvrfsho2w4BDGXnIyPZPLrOoPt6YWGXLSVzFQ_aIJGvXuE_Cu1mbK_kyiXs1aC6gQ70y6F5rAehz6VvFey2A2iEiKeGzGkr_BjVxP07FpqjjeSFlUSUl2nVoLZ3yhYrY-9bIhTUyG5PJGLky4Sny1hgb4PuV_qXZlBPU8j6avY7sA7M-1x8n9wfkjlx-Q9wfJjWKDrXwP5DbJWXOuj-c4Ja7P5a5307Z_4EBxC1yR0A__vjrZ9SNgw4dM9xAqB256LBqv6JNRp6sYRl5EmIavjDVwcZr4J_QzyCAWnii1iHsqDmAQ5QadqwrLLgcTwgPTI-I-vP-RVBfYAj3MHCHvTHMDcK9dYZrH94HF6LXTFD5gD6rbhTww2PszWT3wJ0P6WHsezAZ2UVNq0bD4EvPBUg6QFImywoykjYhRd1RRwV9UaObVQEPWXkPmDdZee__SoJ5UWflffjwXF1l5X1dYV4Qr1CQBsuSROzvHrty4tcplljmRI3ym1TPMiYMD6NvRmSViPIBPY2SOa4kunfO-EcMNIzWaaMOBqxFUvUGAEllgI3Gesq-SIakGuUzlx165kKkbjfAoMxLRhqpJIRL8NmFCzA466DnElBn1RehGBUoK_dFjU7I5-GT76jLJXAlIVu_gyjM-jJKn_buC-06E-r-svFnZEtCFnw1vWdl71HeIN-09-i_gSg_XQRfxuDlO8GTc_Akzf7Pj6tgN1K5DfefIWrOBo3g-9vISz_9FYzikjtOBX8Fk8xkZFuFyfHx5Ty3SHPPPtLy39_R61SlIMOdfi2t3n0Uoc4Z3o4ObDpUvpR2D399MfgC7Q0dAGvFpfOdhfjnM0KiIvEKsawx0-MiDZ0Ox0631uqButGAXVQx3RY-M5huSRzrbRh2Cd4leFfHoUkDC2PXGSyswD31nvYZeaDe-B7TQ0gDnwD3A00i4cBI6gBbODmhaIfZ5HBfEqyp87JFT76yKZgR47X9YR4nmyjD3RG3jOF-tFzJS5S1r2cUU8tAdtg6ZSDEDjYE2xZ1Gx7acEsFlQxwr7HSyX7bJ5lpISGOe4BRIbClE3T4FLO1Qoo3CHmDlG-Q6g2yfYM018gb4DYGylinZ4p3p5nUOugfv4VBmPbYBmrQb9LI1KDFSY4p6_uf_v-XT__45YfHnz_9GFejjIMTNk3kdNpq5q_qSLxop4LM2nBmunAneVnHrT_l4aBK9oItO0KHjzAabh2PyrNKWMfCWqdnE-lYdMqXXhcZCIfHE4JMg6UzE7INMpoIXQQr7QLoIAynF6ksZkegGlOLBzUFvC_JMASirhJBBy4HmlLaH4KVnp_mhNBtiZtyu6vTTvSCHuLEIRzsXhkGuCQtd_jrOGgcCj6duF4XdT-IoKebNHTKkTNZJXIprl5jaoYpaQ9mRp3vHIkZhfCmZ86uimDm7dhiYuCAw_ei4kJktJdfzl4CDlyoOKEOnF2g8QBgUOYStCAuWMEdGCpCGAcWhiM1HUhshcWtMFeIVEwNHXVXsAHXRtU2RXBkaYN4E7eNu1TCXy1TMuyE6DSmgh8kdFhJ8RLlIsSLha2rBFiIACznV1gIOyLGEMkA4cgOS-MYdGhWeJidSoh9SA6hf0iFW8cxdTjeA9g9cwZJIuGA-TBAx30bT6CdQmsSXWES8specZ-KIa5HjU5wCZg6NXCWQtFULgQ2z9PMjO7o5wQCi7RFGkw_xEi1gY4zd1kxcNIgfWNPygY6EwPUph-wFSw9hHvIUJkOjKE2AnYiCWE6JMyP5Rnyq1aJ7UL-DFgwE-BTsebyNXslJWu2XLPVmt2u2WbFri2RtVuydkuulNduydotWbsl9Zrdrdl1UGvD5TqotZu1l7WTtY_bwA1pa22bRhBpy-yRnqkQgRXqGQ_czhVUkCZcr4tMU24ivkDWme9G4bPeUMUhcbGq_KiLROG-qNM1HLi66utq4ZpLUdOXZGHoWYsXNT9zYnRTVNOQBgzQJ5IZsPNCI-cMZ6mNWtuGo2xjNS6N-YKvLvmlRcfnDp8QbEet58Zr3ZsGhKO_CAc-3kN2SrmZgKTuG-mWOw1mWPhlewKT8jvNCXX0cIAOH4RqU9d1x7TnTrTYxPbkRMufAxE3xmneGQyiuGTIJVMujFEYRJ5MGspgajYhCLc8RI00NkrsbwDfVAwfQDoqcHz-P2BtYezUoqOVdYbGp4RRgJlxA4xr47_wYft9pAawUeE7QJBaRgU1mEuGp3TnTM2moAtJzmR5JqszuT2TtSfx1Gx2M9HMxO2ilZAQ33QbXXmCzEQ5E9VMbBMx2whN-bk_hVWcUg9_ZcNMvHo8tn78rMw3atQ4N1oPH3R47H_zLvCmuyu72_KW3sBdsSPbsibbfHdzvGtozXZQ92VV7yil7a7rclqzsii3dHebdzf8juSkym_zsihIUVWbnDZ1v93uWnJ7y2hfZVUOA-ViI8Q0bJQ53HBrR7gr8l3eNDeCtiBs-C2DEAnPKEj915Dt4425C-9h2vFgsyoX3Dp7NuO4E-FHkPv4qiLbPqLP3NrwFlN0XB6Q6lFW5_6qqfOMPGR1zgYN37M6Rzh8k_OAs85nYDTibv0LyYG749humBrS26U0YG3U1_Ci_CkEazPylFYz3ZF_BwAA__-yymjz">