<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/55583>55583</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Improve vectorized code of loop with bool condition
</td>
</tr>
<tr>
<th>Labels</th>
<td>
loopoptim,
vectorization
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
davidbolvansky
</td>
</tr>
</table>
<pre>
`#define N 256
typedef char T;
extern T a[N];
extern T b[N];
extern T c[N];
extern _Bool pb[N];
extern char pc[N];
void predicate_by_bool()
{
for (int i = 0; i < N; i++)
c[i] = pb[i] ? a[i] : b[i];
}
void predicate_by_char()
{
for (int i = 0; i < N; i++)
c[i] = pc[i] ? a[i] : b[i];
}
`
LLVM -O3 -mavx2:
```
predicate_by_bool: # @predicate_by_bool
xor eax, eax
mov r8, qword ptr [rip + pb@GOTPCREL]
vpxor xmm0, xmm0, xmm0
mov rdx, qword ptr [rip + b@GOTPCREL]
mov rsi, qword ptr [rip + a@GOTPCREL]
mov r9, qword ptr [rip + c@GOTPCREL]
.LBB0_1: # =>This Inner Loop Header: Depth=1
vpcmpeqb ymm1, ymm0, ymmword ptr [r8 + rax]
vpmovmskb ecx, ymm1
test cl, 1
mov rdi, rsi
cmovne rdi, rdx
test cl, 2
vmovd xmm1, dword ptr [rdi + rax] # xmm1 = mem[0],zero,zero,zero
mov rdi, rsi
cmovne rdi, rdx
vpinsrb xmm1, xmm1, byte ptr [rdi + rax + 1], 1
test cl, 4
mov rdi, rsi
cmovne rdi, rdx
test cl, 8
vpinsrb xmm1, xmm1, byte ptr [rdi + rax + 2], 2
mov rdi, rsi
cmovne rdi, rdx
vpinsrb xmm1, xmm1, byte ptr [rdi + rax + 3], 3
test cl, 16
mov rdi, rsi
cmovne rdi, rdx
test cl, 32
vpinsrb xmm1, xmm1, byte ptr [rdi + rax + 4], 4
mov rdi, rsi
cmovne rdi, rdx
....
```
ICC -O3 -amvx2 (GCC is similar..):
```
predicate_by_bool:
vpxor ymm0, ymm0, ymm0 #12.27
vmovdqu ymm1, YMMWORD PTR a[rip] #12.27
vmovdqu ymm4, YMMWORD PTR 32+a[rip] #12.27
vmovdqu ymm7, YMMWORD PTR 64+a[rip] #12.27
vmovdqu ymm10, YMMWORD PTR 96+a[rip] #12.27
vmovdqu ymm13, YMMWORD PTR 128+a[rip] #12.27
vpcmpeqb ymm2, ymm0, YMMWORD PTR pb[rip] #12.12
vpcmpeqb ymm5, ymm0, YMMWORD PTR 32+pb[rip] #12.12
vpcmpeqb ymm8, ymm0, YMMWORD PTR 64+pb[rip] #12.12
vpblendvb ymm3, ymm1, YMMWORD PTR b[rip], ymm2 #12.27
vpblendvb ymm6, ymm4, YMMWORD PTR 32+b[rip], ymm5 #12.27
vpblendvb ymm9, ymm7, YMMWORD PTR 64+b[rip], ymm8 #12.27
vmovdqu ymm1, YMMWORD PTR 160+a[rip] #12.27
vmovdqu ymm4, YMMWORD PTR 192+a[rip] #12.27
vmovdqu YMMWORD PTR c[rip], ymm3 #12.5
vmovdqu YMMWORD PTR 32+c[rip], ymm6 #12.5
vmovdqu YMMWORD PTR 64+c[rip], ymm9 #12.5
vpcmpeqb ymm11, ymm0, YMMWORD PTR 96+pb[rip] #12.12
vpcmpeqb ymm14, ymm0, YMMWORD PTR 128+pb[rip] #12.12
vpcmpeqb ymm2, ymm0, YMMWORD PTR 160+pb[rip] #12.12
vpcmpeqb ymm5, ymm0, YMMWORD PTR 192+pb[rip] #12.12
vpcmpeqb ymm7, ymm0, YMMWORD PTR 224+pb[rip] #12.12
vmovdqu ymm0, YMMWORD PTR 224+a[rip] #12.27
vpblendvb ymm12, ymm10, YMMWORD PTR 96+b[rip], ymm11 #12.27
vpblendvb ymm15, ymm13, YMMWORD PTR 128+b[rip], ymm14 #12.27
vpblendvb ymm3, ymm1, YMMWORD PTR 160+b[rip], ymm2 #12.27
vpblendvb ymm6, ymm4, YMMWORD PTR 192+b[rip], ymm5 #12.27
vpblendvb ymm8, ymm0, YMMWORD PTR 224+b[rip], ymm7 #12.27
vmovdqu YMMWORD PTR 96+c[rip], ymm12 #12.5
vmovdqu YMMWORD PTR 128+c[rip], ymm15 #12.5
vmovdqu YMMWORD PTR 160+c[rip], ymm3 #12.5
vmovdqu YMMWORD PTR 192+c[rip], ymm6 #12.5
vmovdqu YMMWORD PTR 224+c[rip], ymm8 #12.5
vzeroupper #13.1
ret
```
Current codegen: https://godbolt.org/z/hPf9fEs8v
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzFmNtzojoYwP8afMnUgSAIDz6s2t3Tmd6m09kz56nDJWp2uTVEVvvXny8BFYFYqu0sowZIvl--W76AfhpuJ5qta9gMyYImBN0jbNmaPtf0b3ybEbiLgpXH0LNmTsvbZMMJS9Az8jRreq9Z83aPr-wJFD0v0zSNUKYSlCpkLeHyt0hpiDJGQhp4nLz42xcfYBp2NOxW48aVAEKLlCHooglHFGnmHOlAk6czdC9PNTyVH3cngqTWFCaWAlLJ6uq7dEJ18Q3teg4KjuenNRWGfZ2mwTmaQjbIk9vbn3fo6sFEV7FXbDBIHQaUH3nZdrwpNWkfkGRIG-ltgb324tiA2eIg3kbDM9kc9cdpIVvmiO7XPykDl3JwlTVlNINJpiJCI_3Hw_Pj7On6Vhh5BCiycopNHOsCcdR2TxVulHOdnGoPyKkS4PUCuEr5oEt-eDud6i-GMhSNoJiQB9fPK5qjmyQhDN2maYb-IV5ImEDMScZXMMhoOjKIM_Lq7663cWwINbeVQ6E9UtiR-jIIaDskYGic_96hSLCpAI0pOcm5aINI9BuqcElvC6cf9QcwACrcvj_cnILjhoogHJZZI40Mj0wLac22vV_FWLkQYxLDKF3YjWdvhKWN5lPNKDKa5Mzfa7pr_S0nHQrL1ihVa3r02COjr3S3c7kRuDIC_31_mpUq5sn0tb_SoWYzgc8wY1SZ8cmRH8JR3xYbO8rNbFbuO14M-47YA3_AHShOOY1p5DEQhi3vA9tRd_1H9UK1b9HJA1a1gYd43FEbXtfoUAL_u7v79-Fpjh6fn-S-C8V6VxjOYY6aTAgunp4E96COm1R79AlUQ29iXfsdbB-q2aQa4qnpFFZB3W9aQMX16Nfh8hmvk1tSjdbiqlEtFVVGTYHuwXVUXBm3D3L9iCRh4Queud9rG9wDsRqB3_dujWtXUt3Z24Jb_cluJdKdwS2y84E8a6WZeDm6NHtbHjDc9xbwe9Q6LWgabLZ5B6bVBylD1OLal0FldFpQtze0vhgMQ7UaZLnpXg09FpkxUnHLgtMJ7sFVlpoywc7WV1lsyhQ7mztWcTFWVxsFt74UFLwzlkK9IBg79yp2n1ZJMIy-5J2DVTtQGz3qR1aW3TIjVMX3ksJb5oSq8vYgK7egMoot8vgkubNIyHi1ioRR23qO86JX5SlD1aZaF0FlnPoU349A3b6V9wPQMjotqNMXKl5S11lGWFvg1CFg5rDxTskIvKR0PrqXv7M1YyThKEhDsiSJ-P9hxXmWi-d4_B0-yzT004gPU7aEqzf4rh4X7uI6d4pBODFD13S9Aac8IpObOGNpQVBBAp4y-kZCiUXpAkXiL44_lK-QeEmA20lIOU2TwZpFk8aEMGrtD4M0hosoKnbNFcB_ARkuaZ6vSQ4nlmU55mA18RxL1_XQNsfeQncDkxBfdxZ2sHAINvXAGkSeT6J8AiHRMBbKpBmnQJaFDJqdyp5QSty25gM6wTrGumW4hqGLOC3ssR9iA6zXPRJatjbSSezRaCjUEw4asInU1F8vc-iMaM7zQ6eX53SZECK1AL635quUTUKvoMLFhZfkv7cDadtEGvY_Aiq1AA">