<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/55583>55583</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Improve vectorized code of loop with bool condition
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            loopoptim,
            vectorization
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          davidbolvansky
      </td>
    </tr>
</table>

<pre>
    `#define N 256
typedef char T;
extern T a[N];
extern T b[N];
extern T c[N];
extern _Bool pb[N];
extern char pc[N];

void predicate_by_bool()
{
  for (int i = 0; i < N; i++)
    c[i] = pb[i] ? a[i] : b[i];
}

void predicate_by_char()
{
  for (int i = 0; i < N; i++)
    c[i] = pc[i] ? a[i] : b[i];
}
`
LLVM -O3 -mavx2:
```
predicate_by_bool:                      # @predicate_by_bool
        xor     eax, eax
        mov     r8, qword ptr [rip + pb@GOTPCREL]
        vpxor   xmm0, xmm0, xmm0
        mov     rdx, qword ptr [rip + b@GOTPCREL]
        mov     rsi, qword ptr [rip + a@GOTPCREL]
        mov     r9, qword ptr [rip + c@GOTPCREL]
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        vpcmpeqb        ymm1, ymm0, ymmword ptr [r8 + rax]
        vpmovmskb       ecx, ymm1
        test    cl, 1
        mov     rdi, rsi
        cmovne  rdi, rdx
        test    cl, 2
        vmovd   xmm1, dword ptr [rdi + rax]     # xmm1 = mem[0],zero,zero,zero
        mov     rdi, rsi
        cmovne  rdi, rdx
        vpinsrb xmm1, xmm1, byte ptr [rdi + rax + 1], 1
        test    cl, 4
        mov     rdi, rsi
        cmovne  rdi, rdx
        test    cl, 8
        vpinsrb xmm1, xmm1, byte ptr [rdi + rax + 2], 2
        mov     rdi, rsi
        cmovne  rdi, rdx
        vpinsrb xmm1, xmm1, byte ptr [rdi + rax + 3], 3
        test    cl, 16
        mov     rdi, rsi
        cmovne  rdi, rdx
        test    cl, 32
        vpinsrb xmm1, xmm1, byte ptr [rdi + rax + 4], 4
        mov     rdi, rsi
        cmovne  rdi, rdx
        ....

```
ICC -O3 -amvx2 (GCC is similar..):
```
predicate_by_bool:
        vpxor     ymm0, ymm0, ymm0                              #12.27
        vmovdqu   ymm1, YMMWORD PTR a[rip]                      #12.27
        vmovdqu   ymm4, YMMWORD PTR 32+a[rip]                   #12.27
        vmovdqu   ymm7, YMMWORD PTR 64+a[rip]                   #12.27
        vmovdqu   ymm10, YMMWORD PTR 96+a[rip]                  #12.27
        vmovdqu   ymm13, YMMWORD PTR 128+a[rip]                 #12.27
        vpcmpeqb  ymm2, ymm0, YMMWORD PTR pb[rip]               #12.12
        vpcmpeqb  ymm5, ymm0, YMMWORD PTR 32+pb[rip]            #12.12
        vpcmpeqb  ymm8, ymm0, YMMWORD PTR 64+pb[rip]            #12.12
        vpblendvb ymm3, ymm1, YMMWORD PTR b[rip], ymm2          #12.27
        vpblendvb ymm6, ymm4, YMMWORD PTR 32+b[rip], ymm5       #12.27
        vpblendvb ymm9, ymm7, YMMWORD PTR 64+b[rip], ymm8       #12.27
        vmovdqu   ymm1, YMMWORD PTR 160+a[rip]                  #12.27
        vmovdqu   ymm4, YMMWORD PTR 192+a[rip]                  #12.27
        vmovdqu   YMMWORD PTR c[rip], ymm3                      #12.5
        vmovdqu   YMMWORD PTR 32+c[rip], ymm6                   #12.5
        vmovdqu   YMMWORD PTR 64+c[rip], ymm9                   #12.5
        vpcmpeqb  ymm11, ymm0, YMMWORD PTR 96+pb[rip]           #12.12
        vpcmpeqb  ymm14, ymm0, YMMWORD PTR 128+pb[rip]          #12.12
        vpcmpeqb  ymm2, ymm0, YMMWORD PTR 160+pb[rip]           #12.12
        vpcmpeqb  ymm5, ymm0, YMMWORD PTR 192+pb[rip]           #12.12
        vpcmpeqb  ymm7, ymm0, YMMWORD PTR 224+pb[rip]           #12.12
        vmovdqu   ymm0, YMMWORD PTR 224+a[rip]                  #12.27
        vpblendvb ymm12, ymm10, YMMWORD PTR 96+b[rip], ymm11    #12.27
        vpblendvb ymm15, ymm13, YMMWORD PTR 128+b[rip], ymm14   #12.27
        vpblendvb ymm3, ymm1, YMMWORD PTR 160+b[rip], ymm2      #12.27
        vpblendvb ymm6, ymm4, YMMWORD PTR 192+b[rip], ymm5      #12.27
        vpblendvb ymm8, ymm0, YMMWORD PTR 224+b[rip], ymm7      #12.27
        vmovdqu   YMMWORD PTR 96+c[rip], ymm12                  #12.5
        vmovdqu   YMMWORD PTR 128+c[rip], ymm15                 #12.5
        vmovdqu   YMMWORD PTR 160+c[rip], ymm3                  #12.5
        vmovdqu   YMMWORD PTR 192+c[rip], ymm6                  #12.5
        vmovdqu   YMMWORD PTR 224+c[rip], ymm8                  #12.5
        vzeroupper                                              #13.1
        ret  
```

Current codegen: https://godbolt.org/z/hPf9fEs8v
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzFmNtzojoYwP8afMnUgSAIDz6s2t3Tmd6m09kz56nDJWp2uTVEVvvXny8BFYFYqu0sowZIvl--W76AfhpuJ5qta9gMyYImBN0jbNmaPtf0b3ybEbiLgpXH0LNmTsvbZMMJS9Az8jRreq9Z83aPr-wJFD0v0zSNUKYSlCpkLeHyt0hpiDJGQhp4nLz42xcfYBp2NOxW48aVAEKLlCHooglHFGnmHOlAk6czdC9PNTyVH3cngqTWFCaWAlLJ6uq7dEJ18Q3teg4KjuenNRWGfZ2mwTmaQjbIk9vbn3fo6sFEV7FXbDBIHQaUH3nZdrwpNWkfkGRIG-ltgb324tiA2eIg3kbDM9kc9cdpIVvmiO7XPykDl3JwlTVlNINJpiJCI_3Hw_Pj7On6Vhh5BCiycopNHOsCcdR2TxVulHOdnGoPyKkS4PUCuEr5oEt-eDud6i-GMhSNoJiQB9fPK5qjmyQhDN2maYb-IV5ImEDMScZXMMhoOjKIM_Lq7663cWwINbeVQ6E9UtiR-jIIaDskYGic_96hSLCpAI0pOcm5aINI9BuqcElvC6cf9QcwACrcvj_cnILjhoogHJZZI40Mj0wLac22vV_FWLkQYxLDKF3YjWdvhKWN5lPNKDKa5Mzfa7pr_S0nHQrL1ihVa3r02COjr3S3c7kRuDIC_31_mpUq5sn0tb_SoWYzgc8wY1SZ8cmRH8JR3xYbO8rNbFbuO14M-47YA3_AHShOOY1p5DEQhi3vA9tRd_1H9UK1b9HJA1a1gYd43FEbXtfoUAL_u7v79-Fpjh6fn-S-C8V6VxjOYY6aTAgunp4E96COm1R79AlUQ29iXfsdbB-q2aQa4qnpFFZB3W9aQMX16Nfh8hmvk1tSjdbiqlEtFVVGTYHuwXVUXBm3D3L9iCRh4Queud9rG9wDsRqB3_dujWtXUt3Z24Jb_cluJdKdwS2y84E8a6WZeDm6NHtbHjDc9xbwe9Q6LWgabLZ5B6bVBylD1OLal0FldFpQtze0vhgMQ7UaZLnpXg09FpkxUnHLgtMJ7sFVlpoywc7WV1lsyhQ7mztWcTFWVxsFt74UFLwzlkK9IBg79yp2n1ZJMIy-5J2DVTtQGz3qR1aW3TIjVMX3ksJb5oSq8vYgK7egMoot8vgkubNIyHi1ioRR23qO86JX5SlD1aZaF0FlnPoU349A3b6V9wPQMjotqNMXKl5S11lGWFvg1CFg5rDxTskIvKR0PrqXv7M1YyThKEhDsiSJ-P9hxXmWi-d4_B0-yzT004gPU7aEqzf4rh4X7uI6d4pBODFD13S9Aac8IpObOGNpQVBBAp4y-kZCiUXpAkXiL44_lK-QeEmA20lIOU2TwZpFk8aEMGrtD4M0hosoKnbNFcB_ARkuaZ6vSQ4nlmU55mA18RxL1_XQNsfeQncDkxBfdxZ2sHAINvXAGkSeT6J8AiHRMBbKpBmnQJaFDJqdyp5QSty25gM6wTrGumW4hqGLOC3ssR9iA6zXPRJatjbSSezRaCjUEw4asInU1F8vc-iMaM7zQ6eX53SZECK1AL635quUTUKvoMLFhZfkv7cDadtEGvY_Aiq1AA">