<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/107093>107093</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[Aarch64] Bad optimization of vector `and 1` + `neg` that should be `cmtst`
</td>
</tr>
<tr>
<th>Labels</th>
<td>
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Validark
</td>
</tr>
</table>
<pre>
[Zig godbolt link](https://zig.godbolt.org/z/5nGnxnbns)
```zig
fn cmtst(a: @Vector(16, u8), c: u8) @Vector(16, u8) {
if (@popCount(c) != 1) unreachable;
const b = @as(@Vector(16, u8), @splat(c));
return @select(u8, (a & b) == b, @as(@Vector(16, u8), @splat(0xff)), @as(@Vector(16, u8), @splat(0)));
}
export fn cmtst1(a: @Vector(16, u8)) @Vector(16, u8) {
return cmtst(a, 1);
}
export fn cmtst2(a: @Vector(16, u8)) @Vector(16, u8) {
return cmtst(a, 2);
}
```
Compiled for Apple M3:
```asm
cmtst1:
movi v1.16b, #1
and v0.16b, v0.16b, v1.16b
neg v0.16b, v0.16b
ret
cmtst2:
movi v1.16b, #2
cmtst v0.16b, v0.16b, v1.16b
ret
```
LLVM dump (`zig build-obj ./src/llvm_code.zig -O ReleaseFast -target aarch64-linux -mcpu apple_latest -femit-llvm-ir -fstrip`):
```llvm
; ModuleID = 'BitcodeBuffer'
source_filename = "llvm_code"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-musl"
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local <16 x i8> @cmtst1(<16 x i8> %0) local_unnamed_addr #0 {
%2 = and <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = sub nsw <16 x i8> zeroinitializer, %2
ret <16 x i8> %3
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
define dso_local <16 x i8> @cmtst2(<16 x i8> %0) local_unnamed_addr #0 {
%2 = and <16 x i8> %0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
%3 = icmp ne <16 x i8> %2, zeroinitializer
%4 = sext <16 x i1> %3 to <16 x i8>
ret <16 x i8> %4
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "frame-pointer"="none" "target-cpu"="apple-latest" "target-features"="-a510,-a520,-a65,-a710,-a720,-a76,-a78,-a78c,-addr-lsl-fast,+aes,-aggressive-fma,+alternate-sextload-cvt-f32-pattern,+altnzcv,-alu-lsl-fast,+am,+amvs,+arith-bcc-fusion,+arith-cbz-fusion,-ascend-store-address,-b16b16,-balance-fp-ops,+bf16,-brbe,+bti,-call-saved-x10,-call-saved-x11,-call-saved-x12,-call-saved-x13,-call-saved-x14,-call-saved-x15,-call-saved-x18,-call-saved-x8,-call-saved-x9,+ccdp,+ccidx,+ccpp,-chk,-clrbhb,-cmp-bcc-fusion,+complxnum,+CONTEXTIDREL2,-cortex-r82,-cpa,+crc,+crypto,-cssc,-d128,+disable-latency-sched-heuristic,-disable-ldp,-disable-stp,+dit,+dotprod,+ecv,+el2vmsa,+el3,-enable-select-opt,-ete,-exynos-cheap-as-move,-f32mm,-f64mm,-faminmax,+fgt,-fix-cortex-a53-835769,+flagm,-fmv,-force-32bit-jump-tables,+fp16fml,-fp8,-fp8dot2,-fp8dot4,-fp8fma,+fp-armv8,-fpmr,+fptoint,+fullfp16,+fuse-address,-fuse-addsub-2reg-const1,-fuse-adrp-add,+fuse-aes,+fuse-arith-logic,+fuse-crypto-eor,+fuse-csel,+fuse-literals,-gcs,-harden-sls-blr,-harden-sls-nocomdat,-harden-sls-retbr,-hbc,+hcx,+i8mm,-ite,+jsconv,-ldp-aligned-only,+lor,-ls64,+lse,-lse128,+lse2,-lut,-mec,-mops,+mpam,-mte,+neon,-nmi,-no-bti-at-return-twice,-no-neg-immediates,-no-sve-fp-ld1r,-no-zcz-fp,+nv,-outline-atomics,+pan,+pan-rwv,+pauth,-pauth-lr,+perfmon,-predictable-select-expensive,+predres,-prfm-slc-target,-rand,+ras,-rasv2,+rcpc,-rcpc3,+rcpc-immo,+rdm,-reserve-x1,-reserve-x10,-reserve-x11,-reserve-x12,-reserve-x13,-reserve-x14,-reserve-x15,-reserve-x18,-reserve-x2,-reserve-x20,-reserve-x21,-reserve-x22,-reserve-x23,-reserve-x24,-reserve-x25,-reserve-x26,-reserve-x27,-reserve-x28,-reserve-x3,-reserve-x30,-reserve-x4,-reserve-x5,-reserve-x6,-reserve-x7,-reserve-x9,-rme,+sb,+sel2,+sha2,+sha3,-slow-misaligned-128store,-slow-paired-128,-slow-strqro-store,-sm4,-sme,-sme2,-sme2p1,-sme-f16f16,-sme-f64f64,-sme-f8f16,-sme-f8f32,-sme-fa64,-sme-i16i64,-sme-lutv2,-spe,-spe-eef,-specres2,+specrestrict,+ssbs,-ssve-fp8dot2,-ssve-fp8dot4,-ssve-fp8fma,+store-pair-suppress,-stp-aligned-only,-strict-align,-sve,-sve2,-sve2-aes,-sve2-bitperm,-sve2-sha3,-sve2-sm4,-sve2p1,-tagged-globals,-the,+tlb-rmi,-tlbiw,-tme,-tpidr-el1,-tpidr-el2,-tpidr-el3,-tpidrro-el0,+tracev8.4,-trbe,+uaops,-use-experimental-zeroing-pseudos,-use-postra-scheduler,-use-reciprocal-square-root,-use-scalar-inc-vl,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,-v8.7a,-v8.8a,-v8.9a,+v8a,-v8r,-v9.1a,-v9.2a,-v9.3a,-v9.4a,-v9.5a,-v9a,+vh,-wfxt,-xs,+zcm,+zcz,-zcz-fp-workaround,+zcz-gp" }
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzMWFtv7LYR_jXyC0FjRWm18oMffDkuAiQNkAZB0ReDokZansNbSEpe-9cXvEi7sp02Adq0hm3OfBzODGeGI4nUOT4qgNtif1_sH6_o5I_a3v5CBe-p_XbV6f41zP2Dj2jUfaeFR4Krb8X-sSDt0XvjiuquIE8FeXrj43WWudZ2DEhBnvbqL-qkOuUKclPsHovdXf7f7NLvGx8TMijEpHe-IC0tqjtU1LtfgHltC9KWTUEe0NQGJeQBsTAfud-SQsXhPqlFCCE-oIK0Rb0z2jzoSQUbLEqRsqgeURnoSVmg7Eg7AUV1sZhp5TzqUBAs6h11SdVv-VbUO2cEXUyE31WbBT9ZFUVAAAsyYd1D8I6igjSoi15Vj8FYl9X9AYu70zBko3987bLw7HBxeLzMGZyMth4tiSr_baZ-Z3pyWM7ZJw8xJb_TD_Lf9IN87sdSvZduPWhpuIAeDdqiO2MEoB-qcDw-K3vqZEJyKBcxJPXMgzNzeV02qQRIVZ6dDD9U9XGcd4vMBZXWJXkF4-eCG3UW_KWPOairR_knOrbximwF4rrf49XZ3mdh_P77X35A_SRNPLOxQaBu4qLHuvuKrgvy5CwryJMQs3xmuofrIIF_RD-BAOrgiTqPsKd2BI8otezY1FhwNZ0QlsxMiIbUPAvqIQgOILnHQRnmFuHBectNcCnk_fPcBeEMVffoB91PAr57TA2CHO65D17dT8MAtiCHJOn0ZBk8D1yAohKyMFk3UZAczex4Tz0V9FVPfhEFLIvqDjBvi-ou_FUE87Ipqrv4L3BNXVR3TY15SYJASVqsKpKwvwXsnZGwVbH6ssRqUt-UflEpZlhOTqzrzrt-mhTzXCt0570NDwEkJ-eN1aMF55DSgwVASltgk3WBcq-KIaUn9cJVj164EPm8SZDavhakVVpB7MQvPnbhaKyHgStAvdPPQjMqUFE9lA06oRCHL-Fgr83o3QzZhySiuOp5UiHs_TPtextqd3d59AuyJzEK4WB9puUhoLwNXekB_U_G6suFs1V01k0dUu7lncNvYDVX3HMq-FsowIe4vWW1Bf9xh9Wnbfb_Mc3kT0kzyWH_88dP0syZNEjBR4fjqvfpPq-uU5HA6SLh5ZJw5PVW4b-sj_rT-qDeW95NHlyOdGgjh_v_fIWE5jRYKgEbzZUPRU3COxIhSZAEgdTSMDPTOhsbPU6Nfis1APWTBbeKYrovQwFguidpbPZxOGT4kOFDk4Y2DyyOfW-xcAIPNFh6KMg9DcofMB1jGPgMeJA0TwkPVlEPOCRHaNpjNns8VAQb6sPcKqfe2BzViOm9frmMs8uU5f6IO8bwMDmu1SXKurcziqljoHrsvLYQfQcXne3KposvR7ijgioGeDBYm6y_G_Kc7SAjngeAUSGwozP0-JSitUHKDwj5gFQfkPoDsv-AtO-RD8BNcpSx3iwU708LaUyUP36Lg7DdsYuUNB_CyLQ04qSmHPWHH__685e___zd409fvk-70dbDCds2cSanmoU3lUS8Gq_jnHOxZvr4PA5zPXehymOhKvaKHTtCj48wWe48T8KLSNzHyjpvFhW5LHodjl6fGIjFEwhBZunowsRog0oq4ocI1sZH0EMcTq9KO8yOQA2mDks9R3yoiJSRaOpMUMmVpDmkwxi1DPy0BITuK9xW-0OTMzEIOqaFMhb2oC0DXJGOe_x1kgbHA58rbjBlM0gR5Uybh157cibrTK6HazCYWjlnaWkX1IfOkZlJiKB64dzmECy8mzpMLIw4fv-VF1PWhPnL1avDkYsnTuiRsws0FQAGbS9BB-KCFdyDpSK6MbI4HKntQWEnHO6EfYcozbTsqX8HW_BdEu2yB0eWE8TblDbu8xH-6phWMROiN5gKPirosVbiNc2L6C8Wrqkz4CABsNavcBAzIqboiYRYsnJtHNLEZoXlYlRB6kNKxv6hNO48x9Tj9BzA_oUzyDMKRsylhJ6HNp5BN8fWJPrSZuSNveEhH4a0Hz15wRVg6rXkLLtiqFoJbF_mhZn8MayJBBY5RQbsIJOnxkLPmb88MXAyoEJjz8IWepscNHaQ2AmWv0ECZKnKBWOpS4CbSUaYiQELY3WGwq51ZvsYPwsO7Az4VG653ZZ9N0u2bLVl6y2737Ltht1qIluzZGuWvBPemiVbs2RrljRb9rBlt05tFVdbp7Zmtla2RrY2biInc2pdl0cQOWXuSM9U9MAJ_YIld8sJKkkbH6_rnKHcJnyFnLe_Wo3PcrJOQ-bSqQqjKTOFh7LJj-HINfXQ1CvXXk61Q0VWhp6leNnwMycmPycxA3nAAEMmmQW3bDRx3nKW26hzXSxll07j2pgv-PqSX1t0eu8IAcFuMmZpvM5_aEA42Utw5NNzyM05NjOQ3H0T3XFvwMqVX9MTmRzfeQmop-MIPR6F7nLX9ceccy86bFN78qLjL5FIifGG9xaDKC8ZcslUK2M1BrHLKi1lMLfX0Qm_vkRNNDVKHJ4AoalYLkF5KnB6tR-xcTD1epUx2nlL01vCJOIXXsQtMG5s-ArC7teJWsBWa7_MOkYFtZgrhuf8zJnb65KuJDmT1Zmsz-T-TDaBxHN7fViIdiFuVqmMRP_mm2QqEGQhqoWoF2KfiUVHbMovwynu4pR7-BuTC_EW8NT68Yu236jV09JoAzya-Nr_4dbuqr-t-pvqhl7BbXkg-6qpyK66Ot7uDk3dHPq-bJqOspsdLW8Ayq6uK7KHqqyu-C3ZkXp3s6vKsqpIdT2UTU0P-7bd9y30Q1PUO5CUi2shZnmt7XjFnZvgttwddjfVlaAdCLdct9vbeO_UTaMr6p3gzrvzOs-9iBfzd-leptg_onvaI208l_yNxs9yPaA53muiotmFz9qyaHaoIPeBVzAGzh-pR-6oJ9GjDsJEuuFsdleTFbfbW_yR--PUXTMt8xVbHrCx-mu8sn6KG3IFecp7mm_JPwMAAP__Npoikg">