<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/127789>127789</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[AVX-512] Vector-NOT + masked `vpmovwb` loses merge-mask because backend flips order
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Validark
</td>
</tr>
</table>
<pre>
[Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:29,endLineNumber:1,positionColumn:29,positionLineNumber:1,selectionStartColumn:25,selectionStartLineNumber:1,startColumn:25,startLineNumber:1),source:'const+DEFAULT_VERSION+%3D+true%3B%0A%0Aexport+fn+foo(matched:+u32,+unused:+@Vector(64,+u8),+kw_table_indices:+@Vector(64,+u8))+@Vector(32,+u8)+%7B%0A++++_+%3D+unused%3B%0A++++const+keyword_kinds+%3D+@select(%0A++++++++u8,%0A++++++++@as(@Vector(32,+bool),+@bitCast(matched)),%0A++++++++@as(@Vector(32,+u8),+@truncate(@as(@Vector(32,+u16),+@bitCast(vpnot(kw_table_indices))))),%0A++++++++@as(@Vector(32,+u8),+@splat(252)),%0A++++)%3B%0A++++return+keyword_kinds%3B%0A%7D%0A%0Afn+vpnot(a_:+anytype)+@TypeOf(a_)+%7B%0A++++if+(DEFAULT_VERSION)+return+~a_%3B%0A++++const+a:+@Vector(8,+u64)+%3D+@bitCast(a_)%3B%0A++++return+@bitCast(vpternlog(a,+a,+a,+51))%3B%0A%7D%0A%0Afn+vpternlog(vec_1:+@Vector(8,+u64),+vec_2:+@Vector(8,+u64),+vec_3:+@Vector(8,+u64),+comptime+i:+i32)+@Vector(8,+u64)+%7B%0A++++var+result+%3D+vec_1%3B%0A++++asm+volatile+(%0A++++++++%5C%5C+vpternlogq+%25%5Bimm%5D,+%25%5Bvec_3%5D,+%25%5Bvec_2%5D,+%25%5Bvec_1%5D%0A++++++++:+%5Bvec_1%5D+%22%2Bx%22+(result)%0A++++++++:+%5Bvec_2%5D+%22x%22+(vec_2),+%5Bvec_3%5D+%22x%22+(vec_3),+%5Bimm%5D+%22n%22+(i),%0A++++++++:%0A++++)%3B%0A++++return+result%3B%0A%7D%0A'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:50,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:ztrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:2,lang:zig,libs:!(),options:'-O+ReleaseFast+-target+x86_64-linux+-mcpu%3Dznver5',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+trunk+(Editor+%231)',t:'0')),header:(),k:50,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
This assembly:
```asm
vpmovwb ymm1, zmm1
vpternlogq ymm1, ymm1, ymm1, 15
vmovdqu8 ymm0 {k1}, ymm1
```
Should be:
```asm
vpternlogq zmm1, zmm1, zmm1, 51
vpmovwb ymm0 {k1}, zmm1
```
(I assume both 15 and 51 do a bitwise NOT on zmm1?)
Kinda similar to https://github.com/llvm/llvm-project/issues/113400
Optimized LLVM IR:
```llvm
; ModuleID = 'BitcodeBuffer'
source_filename = "root"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux4.19.0-musl"
; Function Attrs: mustprogress nofree norecurse nosanitize_coverage nosync nounwind skipprofile willreturn memory(none) uwtable
define dso_local <32 x i8> @foo(i32 %0, <64 x i8> %1, <64 x i8> %2) local_unnamed_addr #0 {
%4 = bitcast i32 %0 to <32 x i1>
%5 = bitcast <64 x i8> %2 to <32 x i16>
%6 = trunc <32 x i16> %5 to <32 x i8>
%7 = xor <32 x i8> %6, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%8 = select <32 x i1> %4, <32 x i8> %7, <32 x i8> <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
ret <32 x i8> %8
}
attributes #0 = { mustprogress nofree norecurse nosanitize_coverage nosync nounwind skipprofile willreturn memory(none) uwtable "frame-pointer"="all" "target-cpu"="znver5" "target-features"="+64bit,+adx,+aes,+allow-light-256-bit,+avx,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vp2intersect,+avx512vpopcntdq,+avxvnni,+bmi,+bmi2,+branchfusion,+clflushopt,+clwb,+clzero,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fast-15bytenop,+fast-bextr,+fast-dpwssd,+fast-imm16,+fast-lzcnt,+fast-movbe,+fast-scalar-fsqrt,+fast-scalar-shift-masks,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,+fma,+fsgsbase,+fsrm,+fxsr,+gfni,+idivq-to-divl,+invpcid,+lzcnt,+macrofusion,+mmx,+movbe,+movdir64b,+movdiri,+mwaitx,+nopl,+pclmul,+pku,+popcnt,+prefetchi,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sbb-dep-breaking,+sha,+shstk,+slow-shld,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+vzeroupper,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-16bit-mode,-32bit-mode,-amx-bf16,-amx-complex,-amx-fp16,-amx-int8,-amx-tile,-avx10.1-256,-avx10.1-512,-avx512fp16,-avxifma,-avxneconvert,-avxvnniint16,-avxvnniint8,-branch-hint,-ccmp,-cf,-cldemote,-cmpccxadd,-egpr,-enqcmd,-ermsb,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,-fast-7bytenop,-fast-gather,-fast-hops,-fast-shld-rotate,-fast-variable-crosslane-shuffle,-fast-vector-shift-masks,-faster-shift-than-shuffle,-fma4,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,-inline-asm-use-gpr32,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,-ndd,-nf,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,-pad-short-functions,-pconfig,-ppx,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefer-no-gather,-prefer-no-scatter,-ptwrite,-push2pop2,-raoint,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,-serialize,-seses,-sgx,-sha512,-slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,-sse-unaligned-mem,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-usermsr,-waitpkg,-widekl,-xop,-zu" }
!llvm.module.flags = !{}
```
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzsOktv4ziTv0a5CAwkys9DDrbTWQx2vsliur_G4rsElFiSOeZDTVKyk8P-9gUfsqXYne7tXexpBoFVb5aKVcUSp4kxrJEAD8l8m8wf70hn90o_fCWcUaIPd6Wir473L9ak_6ZoqbhN5o8JXu2tbU1SbBL8lOCnN9bcN459z62nFI3n5QleJXj1fWSfFJtKUfhEmVU6wTvmuauacZBEgMeWeYKXCd7VStrPFeGOms8i4Z8G_uMUxLIg9pdjJ3jHiXRrvbEmwTsDHCrLlAz2QdKd4p1wKF4neAeS_s4k_NGJEnQ00CrDnMpUcqBeiZ-X-GyJthel-RXvWvWGxi1Bt75Rna5iYColjU3w9vHT0-afv395-frpz8-_Pf-R4G2C58VjgrdWd-BgR8g2_gdOrdJOq5buR6kErwSx1R6ot7rtCpzgnQNkZwZiMsu-QuU3abWYRf4quJTg7eH4YknJ4YVJyiowP1Rav2Of11xFHp4vB6e357-X0atF70Yvd5EbAnOA16PS9OXAJDUj3WSWhS3xafhOefzn3Nl9KJHMMmKcmRsvUyrFzyFKZlnJ7I44x84Bj6H49SVGe5DMMqs7WRELQfj7Svnitlt9K5V7Xm1n9HP9f-2waTlxC-I5_sj0-vY2a7Cdltf7PMr35eMl833CD69IXkKOEvlqX1s4Z-SX1xae6yDwQSay2j9XV6W3Hvv1X87IRwlKrgtlFePkCmY9zdnLRkXnPgzKu521oCVXjVMOK0wf83zYgI-jdzHTQ_WS_9B_BztJ_NOSxU9JVkq0lglwexEUWIGv-8qNaN7YzZ5oHznTuQPsHPLwhjejTIxwAooTyziEXPi4JvB8vos_lzB-CxzX8ufzLRPCPR9jeZzpISrf4-DvcvLI-citEOv3Gt6QM4y3pwi6NxwitP6f2MQTm2Nzkbs--z591dsKxVThErMgLkfi7Gd6VbH5hYYzBOJWrSzDqjyc0fMwlcjJjKIC9i83m2zjiR78L-KwY8fysTEekmIzzy6mZx-YDog7h7Mb1na3BzHRMg6jMexMKTZv7mg5uKGLcQs6HPCrcrJ0ySTRr5OZLZCey7_cWTtmVEoIkPZZ8teJDQpl1-wI52YiTkEQ2XCYyjLtpqo-jhsDGU5QdXY6OzJpgU-kOCs10a87RadGrWZiotqDLpWBx-ABk837WNacNOYrg-NzC_JXJ1b8fmLlrDSjHXILqdaNkPFl0XOCt38CB2Lgyff4LbJEN-Cg02rxspghzmR3cgxRtZ1vam-yBx1TUvWgNaPwfpkPZuX8Z0fl_Bcm5WvWDwbl_Kfm5PynyzHBWx_7bcx110KGL5Nzca6_X597INS7cA7ldyrWJVieZb9Wu6OXwT9sALeUe9Am7K4_E7NNkm2-7JlJiTEgylCQnpossvDnTrts07dC9ccyHf57FcJtQfrmno49HGpT9vtnPnfCQvX0W7ca2crSZLk95IlrpFF65ELw6PNedZymJXzfx6kTb2MfR895fvuFpk683XQiwavfXLA6AWmp7D7N5ymRNJ3nKVUpSUtmj8xA-sfzl1TJYKR4Osf635mkJDVMME50alU6_ZhumN135X2lRIKfOO-HB2q18m0UPzFjOjeUP-V5McuiV89uHGJvQNPff__6j_S3P69D5K1lm6TYpv9QtOPw22OaFI9pgpdbZt2H-Lara3cCLJNsEyroZfgWj5JYKzc_4yTbhHaTUmIJJ6-qs4MIIJfhgFqXecWmwP7HofkUddBi5n8QOwM5XrkCwStUr7IISU9aDOqLGfrsxMZ-WM1afnYztsBOHqQ6ytAKZ_f5-j5DojM8aMZgPHXSN510Y60_2lLRGdtq1WgwJpWq1gCpVBqqThsHGSKZZW_wUrkuShpPe5VVKlUnj0zS1BxY22rlopceGedhdEgFCKVfE7ySSrqPjrQ7-i-tJNtQqJmElBr1wlVFeJoUuwKnp5StkuJTmsyy8LHOCpy6UcMlaFLsFrOLCJ7nN6luyEq90ZdOus2kL4RSnSa48AmfZJvUyc189EpmK2JsOizkcvTsS54Unwbx-UT8etWp4mKkufCa_nP1nUSwO1ZcjfSWXu-k9Pvg4PkivjlbpchH4W_gGrhEcuUjGc7b6e76RIjBnIZ4eYMaAj4blvgbmAIx4BrsdThXrv0sH0MfItZqVnYWTKxK18aW2___TuSaZ62JANQqNzZr1yrd5zAm3LVNxw8dF_mpMjKHyXLMr4HYTruTKgoleLuYlczGmwd6ioAT8QDn6og4a_YW4fkCXUT70xnAZ2ie47LOFxOcWcKbCeU4xio6xui3MVaPEVYLMsb7UrD3-MSTnk8wKafiLfbBNP4EH9NVW0k7cuSieV7xslSpiaz2decHuHAPwmvemb1q7YAfywF6A60iLFQfIV0NV3HVaYhddYr3JNCD8yogdb6oIkSMRfm8fLUgVTuilXCyeoTT9mgMHRGYEMMiHudvlbQjXKi-hBFuKsKJRrX5pu012exZbZEg5mBGzJ5o5jIXtaA5kYDMvqtrPjbb-_ugidlhe2vTmJKYQdpoEaGTiW_W1MOOMMr6b8gqRNmw3Uz2bcXiK4_eTpBKq_FGCRFTePTKbghmejErx2hcSxwJs1FFqjYu11ZcdAN86CLgcyjCGmqw1Z4NaF3tYwVo2g6OatrqbgC1PFMNQIQN2cd6MGWJKLSo1EAO7vs3UPdkAIw9RNBVr9nzwcQQVGMAn6HiDM3u8xF8kZgNli_S_blF9CEC34aK6V2Sd20LcbOOpVRM9tGF02oZAUN6GIHVCD7XjsfcOihflMwlJ3U6qMBjjIgTim3Hw5USLYfTgNbthcWkXQ1wuCfcIdKf8uw-R2FiOaOh6lDsRION_hT7kAMlVMp1WRtx1ymYtGfRiPsVQ6NAe-bzAlWVaP2z9r-cglDWu1OJtqpOhLqAIWha7Z_yWyUCRQvjshPVhBtweWBQA1aQYHdE9bmPbKyAMUd0vLom8W_vaC340puQhsQeEzWRDQSa60r5pSsFyvI9oSF279MjoHvVmjPishVpZYm9mDz3k0orY951FDTuJ9N-5FkwUO2eyImeIG4uQHuiKUhkuEHsr7ArI5IG_7p7DSZAruXw2HL8RjDJmQREjECdAdS02vdzdHBdAXEgyLQD1BkwiDQe7RmqajaAXBEa1w0ljfjRq8mQCdInilSofG2JMYgCJ683SKjkIOktRjhxrsijiLSEIrNX2qI6foD5KLaVkrW_A0Nt68vK9zSN3HdgmAgGymVGGChuL5CGhhl_SzliqF6YA3JDEuotmLGSVKMcudBMRayNRHvULKRI25k9blXrg66JihWmwbbK7cwEQXCyoCXhyO7D_emIx2S4wYzFGjrPDXblr0Mdz_oCMaAZ4ewNAmKComl8rMyexFbiu3GhXGkCOROYrKjvfgEds1pBKD3SEd5xfkHtUSEBAsXy8bROEs4aCdRzQie6wQgZakQRHr4OjKotqrkiPnzGwFTHES1pGqCo4aokIQK2DAxz4tRqH0_buQxGw7QxKU3H75j0I0qoFi5cISE3B6BKGWsGjuECEc3sfkLWwg8ByJ3F7cHn5JFRCLV2Cj3mrfNT7zDIJzjnvBf3wt-w3Pvb4XgpkbtP7SA3XMnc0YeCros1uYOHfFms14v5Ml_c7R_KCmpC8qwq6XyeLct1htcFUMB0RlfrvLhjDzjD8wzn6zyfrYrFfUFnNC-yxWIGa0wAJ7MMBGH83rujdHPn740ecrxcrtZ3nJTAjf-XHxhLOKae64b0-eOdfvBXTmXXmGSWcWasuZixzHL_T0Y2X__Tn1vzxzT8Dzf0x_OXNMHb1JUh0DRZZPGOLVlkKVcGTCpAN-DrNC2hIp2BtCTVASRNa85akypNQd91mj_8Ly7Gwjv2D_i_AwAA___EMvr-">