<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/89533>89533</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Inverted movemasks result in redundant logic
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          Validark
      </td>
    </tr>
</table>

<pre>
    I wrote this `tokenize` function: (https://zig.godbolt.org/z/oYosTb1zK)

```zig
export fn tokenize(source: [*]const u8) extern struct { start: [*]const u8, end: [*]const u8 } {
    var cur = source[0..];
    const start = cur;

    while (true) {
        const V = @Vector(@bitSizeOf(usize), u8);
        const vec: V = cur[0..@sizeOf(V)].*;

       const identifier_bitstring = ~(@as(usize, @bitCast(vec == @as(V, @splat('_')))));

        cur = cur[@ctz(identifier_bitstring)..];
        if (identifier_bitstring != 0) break;
    }

    // our token span is start..end
    const end = cur;
    return .{ .start = start, .end = end };
}
```

Next I made the following change:
```diff
-       const identifier_bitstring = ~(@as(usize, @bitCast(vec == @as(V, @splat('_')))));
+       const identifier_bitstring =  (@as(usize, @bitCast(vec != @as(V, @splat('_')))));
```

Unfortunately, this results in different emit.

First version (Zen 4): 

```asm
.LCPI0_1:
 .byte   95
tokenize1:
        vpbroadcastb    zmm0, byte ptr [rip + .LCPI0_1]
        mov     rax, rdi
        mov     rdx, rdi
.LBB0_1:
        vmovdqu64       zmm1, zmmword ptr [rdx]
 mov     rcx, rdx
        vpcmpneqb       k1, zmm1, zmm0
 vpcmpeqb        k0, zmm1, zmm0 ; do the same work, but this time not inverted, so we can use jb rather than je?
        kmovq   rdx, k1
 tzcnt   rdx, rdx
        add     rdx, rcx
        kortestq k0, k0
        jb      .LBB0_1
        vzeroupper
 ret
```

Second version (Zen 4):

```asm
LCPI1_1:
 .byte   95
tokenize2:
        vpbroadcastb    zmm0, byte ptr [rip + .LCPI1_1]
        mov     rax, rdi
        mov     rdx, rdi
.LBB1_1:
        vpcmpneqb       k0, zmm0, zmmword ptr [rdx]
 mov     rcx, rdx
        kmovq   rdx, k0
        tzcnt   rdx, rdx
        add     rdx, rcx
        kortestq        k0, k0
 je      .LBB1_1
        vzeroupper
        ret
```

First version (Zen 3):

```asm
.LCPI0_1:
        .byte 95
tokenize1:
        vpbroadcastb    ymm0, byte ptr [rip + .LCPI0_1]
        mov     rax, rdi
        mov     rdx, rdi
.LBB0_1:
        mov     rcx, rdx
        vpcmpeqb        ymm2, ymm0, ymmword ptr [rcx + 32]
        vpcmpeqb        ymm1, ymm0, ymmword ptr [rdx]
        vpmovmskb       esi, ymm2
        vpmovmskb edx, ymm1
        shl     rsi, 32
        or      rsi, rdx
 mov     rdx, rsi ; preserve non-inverted rsi so we can cmp against -1 later??
        not     rdx
        tzcnt   rdx, rdx
        add rdx, rcx
        cmp     rsi, -1
        je      .LBB0_1
 vzeroupper
        ret
```

Second version (Zen 3):

```asm
LCPI1_1:
        .byte   95
tokenize2:
 vpbroadcastb    ymm0, byte ptr [rip + .LCPI1_1]
        mov     rax, rdi
        mov     rdx, rdi
.LBB1_1:
        mov     rcx, rdx
 vpcmpeqb        ymm2, ymm0, ymmword ptr [rcx + 32]
        vpcmpeqb ymm1, ymm0, ymmword ptr [rdx]
        vpmovmskb       esi, ymm2
 vpmovmskb       edx, ymm1
        not     esi
        not     edx ; do 2 not's before combining these bitstrings instead of just doing 1??
 shl     rsi, 32
        or      rsi, rdx
        tzcnt   rdx, rsi
        add     rdx, rcx
        test    rsi, rsi ; use inverted value instead of preserving the non-inverted value and doing cmp -1??
 je      .LBB1_1
        vzeroupper
 ret
```

https://zig.godbolt.org/z/oYosTb1zK
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzMWFtv4zYT_TX0yyCCRMm3Bz_EMQwE3-JrgW0DtC8LShzZjCXSISnH8UN_e0FdLcXOJnspVggimDOcOTM8cySbGSM2EnFBxksyXo1YYbdKLx5YJjjTu1Gs-MviHp61sgh2KwyQiW_VDqU4IZn4kBYysUJJEt4CobOttXtDwltC14SuT2LjbRSPVWY9pTduhdC1-kuZP-Lg9D9C58RfEf-2_j_xq7-T2FQreNwrbSGV0KakM6MKnWCZb7wk9JaMV4mSxkIxI3QOeLSoJRiri8QCmS7BWKbtFf87QMkv2oBMV257hQQA4MA0JIUGEq6gBjFe-p5HxisSnvlVEcqspW9S6NbeeT1vRYauZVYX6ID3cnVxHsoYJPIfMLFKEzojkR8L-1mc8LeU0Flhyr7MXTFlC3pgukAHTFyhDy2mCnzkmybSg9s8XnmuEa8Bt4EER2lFKlB_iYU1Vgu5KYP-U4FjpkN1BxXaO2YsobMDJs6zrqh0fKidzD5jzoXQ6RdCp2VBZ38X8bTHUZVDIj-xJ0JnlxASOn99VO4SKVzZAoQGLrrvjifWyHa9zWS6GmKqaA-q0BVjweyZBGEqNnieI9uAJyj5K5Y4q0ZbaAmeY7DXkakiM70Dr9lY3qddXR2sZp7OUf4fjxbuIWfczTNCqrJMPbtiky2TGzdXg91cpGm1dPML0YAu3wcG3gemOuhvBXOp0X_KVGlbSGYxe3HxSvXUaIrMGhASXF9Ro7SAubDe-d610OXAaiOUdBX8jRKiMuUtXJRMZvJqxft09_u9_yVozxG8-MUiAMzH1UIjpWcu9XXYx1oxnjBjY_f5lOe-Q14G2FvtRFKLPbjmt3nGq36QXB3Ku2ZHt1dzccXO-3bv03LZw92AytWBPxWTqP58yvPAbTzl-bPSvMXFjx2UNkdS5zgOC03yvcSnuP68ayI29_okK8fOD3b-0BFIuASuylkyLEd4VnpXNq2w1ZFbkSNIZUHIA2qL3FmNgmeEhEkoDMJjDJrZLWqwWybhEUm47iPe5erw1HVtF9Rme0qkPe_moFLGea_bycC-U9qisU91ZTu_b36sK28Op9_FE2pV7Peo63WN9o2B-IyJkvwKq98mteNa8B5O0-_ndPCDOR1c4vSQf37Lu-9k9pAng_P8YXTpAW-zPGJHl-CrdKmvt1lzUQrD95DmtRLWV0Wej8vhy68gh--StjPFeslz6hwb8C8DciXHEn9IXyG_ECl4K9I5TdsQuTrkZtfEQCPqnfSaI1Z9KJP1XMw2qwqvYoSDCErDubnry7DBRpSSvddoUB-cMsubRplLayfNSb4HtmHCvWDcBJAxi9op81Ccnbg3KT48btdGzSU_K-hm0I7zUeuU-dtm7LIyv2vIXilzb8beFOgPj9d_ocxXx-snzNVPGahXHtfmqSGtC3HZwI_Nyw11i4RODcSYKo2QqDwW0r1j2y0ahPad273ZGouMg0rhsTAWuHJuQW9uvnWWr4zVsIKvPcXcI-w8fi0J7k2sVYIDywo8L6YWjLrmvmxUzkzyulo3uzf9kj_2aHx7Xj_0I8uIL0I-D-dshItgGoSTGQ3G89F2EXE_jafzMKD-NIqDaBbHNMLUn_izKIjC-UgsqE8jP6KBP4nmwdxjjKd0yudRzDDlE0oiH3MmMi_LDrnLPRLGFLiYzcdhOMpYjJkpf1iiVOIzlEZC3USM9MLtuYmLjSGRnwljTRfFCpvh4r7pbq4OmDOza749uS9PGnkhOZMWMrURyajQ2aLflo2w2yL2EpUTunah69vNXqtHTCyh6xKQIXRdAv43AAD__66nP4U">