<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/62014>62014</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[X86] Superfluous code generated for <8 x i1> used by sext and select instruction simultaneously
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Nekotekina
</td>
</tr>
</table>
<pre>
Hello, I tried to compile the following IR for X86 and got superfluous result:
```
define <8 x i16> @vselect_from_packed_bits1(<8 x i16> %x, <8 x i16> %y, i8 %m, ptr %o) {
0:
%z = bitcast i8 %m to <8 x i1>
%1 = sext <8 x i1> %z to <8 x i16>
%2 = select <8 x i1> %z, <8 x i16> %x, <8 x i16> %y
store <8 x i16> %1, ptr %o
ret <8 x i16> %2
}
```
Here the bit vector %z is used twice: for sext and select. If any of them is omitted, the generated code looks normal (properly vectorized).
Test (when added to the test X86/vselect.ll):
```
define <8 x i16> @vselect_from_packed_bits1(<8 x i16> %x, <8 x i16> %y, i8 %m, ptr %o) {
; SSE2-LABEL: vselect_from_packed_bits1:
; SSE2: # %bb.0:
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: shrb $7, %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: shrb $6, %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: shrb $5, %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm4
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: shrb $4, %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: shrb $3, %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: shrb $2, %al
; SSE2-NEXT: movzbl %al, %eax
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm4
; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; SSE2-NEXT: movd %edi, %xmm3
; SSE2-NEXT: movl %edi, %eax
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm5
; SSE2-NEXT: shrb %dil
; SSE2-NEXT: movzbl %dil, %eax
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: negl %eax
; SSE2-NEXT: movd %eax, %xmm6
; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
; SSE2-NEXT: pand %xmm3, %xmm2
; SSE2-NEXT: pcmpeqw %xmm3, %xmm2
; SSE2-NEXT: pand %xmm5, %xmm0
; SSE2-NEXT: pandn %xmm1, %xmm5
; SSE2-NEXT: por %xmm5, %xmm0
; SSE2-NEXT: movdqa %xmm2, (%rsi)
; SSE2-NEXT: retq
;
; SSE41-LABEL: vselect_from_packed_bits1:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: movl %edi, %eax
; SSE41-NEXT: shrb %al
; SSE41-NEXT: movzbl %al, %eax
; SSE41-NEXT: movl %edi, %ecx
; SSE41-NEXT: shrb $2, %cl
; SSE41-NEXT: movzbl %cl, %ecx
; SSE41-NEXT: movl %edi, %edx
; SSE41-NEXT: shrb $3, %dl
; SSE41-NEXT: movzbl %dl, %edx
; SSE41-NEXT: movl %edi, %r8d
; SSE41-NEXT: shrb $4, %r8b
; SSE41-NEXT: movzbl %r8b, %r8d
; SSE41-NEXT: movl %edi, %r9d
; SSE41-NEXT: shrb $5, %r9b
; SSE41-NEXT: movzbl %r9b, %r9d
; SSE41-NEXT: movl %edi, %r10d
; SSE41-NEXT: movl %edi, %r11d
; SSE41-NEXT: movd %edi, %xmm3
; SSE41-NEXT: andl $1, %edi
; SSE41-NEXT: negl %edi
; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: andl $1, %eax
; SSE41-NEXT: negl %eax
; SSE41-NEXT: pinsrw $1, %eax, %xmm0
; SSE41-NEXT: andl $1, %ecx
; SSE41-NEXT: negl %ecx
; SSE41-NEXT: pinsrw $2, %ecx, %xmm0
; SSE41-NEXT: andl $1, %edx
; SSE41-NEXT: negl %edx
; SSE41-NEXT: pinsrw $3, %edx, %xmm0
; SSE41-NEXT: andl $1, %r8d
; SSE41-NEXT: negl %r8d
; SSE41-NEXT: pinsrw $4, %r8d, %xmm0
; SSE41-NEXT: andl $1, %r9d
; SSE41-NEXT: negl %r9d
; SSE41-NEXT: pinsrw $5, %r9d, %xmm0
; SSE41-NEXT: shrb $6, %r10b
; SSE41-NEXT: movzbl %r10b, %eax
; SSE41-NEXT: andl $1, %eax
; SSE41-NEXT: negl %eax
; SSE41-NEXT: pinsrw $6, %eax, %xmm0
; SSE41-NEXT: shrb $7, %r11b
; SSE41-NEXT: movzbl %r11b, %eax
; SSE41-NEXT: negl %eax
; SSE41-NEXT: pinsrw $7, %eax, %xmm0
; SSE41-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
; SSE41-NEXT: pand %xmm4, %xmm3
; SSE41-NEXT: pcmpeqw %xmm4, %xmm3
; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm3, (%rsi)
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
0:
%z = bitcast i8 %m to <8 x i1>
%1 = sext <8 x i1> %z to <8 x i16>
%2 = select <8 x i1> %z, <8 x i16> %x, <8 x i16> %y
store <8 x i16> %1, ptr %o
ret <8 x i16> %2
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzsWVFvmzoU_jXOi9UI20DIQx7aptUmTXu420PfJsBO4luDqW2adr_-yoQkEAI4ud3VJt1p0wB_5_g7x98xhzjWmq9zxhYguAPBchKXZiPV4it7loY98zyeJJK-Lz4xISTA9_AzNIozCo2EqcwKLhg0GwZXUgi55fkafv4LrqSCT1EI45zCtTRQlwVTK1HKUkPFdCkMILfAWwLvFoRe_be6pWzFcwYBuY_gG-QoBOQBAt971Uyw1PxYKZn9KOL0mdEfCTcaARydgHHwZnl2nr7bpzyyl5m9LIyy1xLgOQSzu9383oEYtIM_ISBLmHCTxtrsjW3oB--APDTwqMJr9mZaiJ2rplnYtsO1nY2xa3k2nL4ga6faSNXJIw5QK_Iaq5jpInG9PrPl2YX6xNRu4RNu4CtLjVS7KLmGpbb62PKUAXJbiaHKiFXDLsQp_LyCcf4O5cr6yKyRzLgxjFp-1u2a5UzFhlGYSsqgkPJZw1yqLBYQ4KhQsmBKvNdT85_Wcj7dcfvOtLGg7YblMKZ0p1br1diRpygE-LFW1FQIgOdHOf6OogTkDn779oBvvtzePXyxOe2f-RBIbWPRuz8AE-s6SaZeB3Xz9eHpew3N5KtNccAoryjjgMVv_XC9UQkE2J_V4FgMuv6ZiBrl4DtnazEKyuQr3YN2Pt-yDH94fOFYfJcGF-fUon30azNB-vFFmRfps9jSSmqzuynAtwATgO9s1c-W0JpXO1PlJ7jzQLAE-L7Kb_POjqHWGGqN4dYYbo2R1lh199FLF_yhS-d_eCb8X1CkH5yMS8vZRcR4L-K2bP2OpFFrDPXI1u9ImrTGhkVcE6YvlxPu1iDq1uBHa4b8odUzsPFdmQn8h1bPwD7iUj3-Xoz-gBj982I8Uy-kU0uk-0IYX-rD0n3oUv8XyxGMKi2g3EliFvb7FFz472QW7GUWtIQVtu6ClrDC1l3QElbYugtaMgs_ZJPuIewPEPZHN-l64pdrZj68LM57L_SmXImty2tnV9v4vvnP9g62k7KN8Gw4BjvRBe_j5jxjtf8SjzarVY7xPa45RwDfI0ua2CehfYRwNByA_VTdby2jTUiRZgV72bobtGcIjgZe3wxxTvMag5w2kmL3Oe7mfp_XmnSFjwAOlOb227h3EsXMy2G0BfPRFd-qPnL6WPXReeJeX-IbBk4vhAZ-vx-fvPE7Hsde-aMU0iH8af-RurFJhZP3M2yoC5u90KkbGyqcvHfZqIg6sPEP6MSJjsW5-D_DZ-7CJzigHfnMEyf_Z_gg71IDNGIw1mG1DTrtBeUD6EN_MYg6S8K7hMRgBfY1OW1UwXOttqder6MzWIIHOv2oIxfccHkdl_4CPC6OW15Iw-VVXIaLb09nGHWk4ze8XkdnsPYOdAZRRzpBw6sTndPfGhXyHHcPC3QR_q8tk_DSMjn98Vgh5Bowcgv40hBml4Yw3lWTq7vqZv2P9dQDs_R7dWmq_eub6pNMHXte3-3N0u6sXY0SwXL6mpxrCw-XyCEjzXa-px3u60WRm3aODfT_546j544TuiB0TubxhC1QGHnzwPMImWwWFCUJIx5ZJdRDCUs8imk6T-yTMEh9b8IX2MPE870IeYQEZBoHEaIrOlvNvVWazBDwPZbFXEyFeM2mUq0nXOuSLULsIX8i4oQJXZ2PY5yzLawGAcYgWE7UwtrcJOVaA98TXBt99GK4EdXB-lMUgmAJvzXOwatjzeMp58p-sDUXoDpETd5Pj04hz7VRZWq4zKHmWSlMnDNZavE-KZVYbIwptBUTfgT4cc3NpkymqcwAfrS06v9uCiX_ZqkB-LEKRgP8WAX7TwAAAP__zGU3cA">