<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/133568>133568</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [AVX512] Avoid Memory form of Compress in AMD znver4
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          venkataramananhashkumar
      </td>
    </tr>
</table>

<pre>
    for the given LLVM IR code X86 generates memory form of compress. 
ref: https://godbolt.org/z/KhhczdbY8

.LBB0_4: # %vector.body
        vptestmd        k1, ymm1, ymm0
        movsxd  r8, r8d
        vpaddd  ymm1, ymm1, ymm2
 vmovupd zmm3 {k1} {z}, zmmword ptr [rsi + r11]
        kmovb   ebx, k1
 popcnt  ebx, ebx
        kortestb        k1, k1
        cmove   ebx, r10d
        add     r11, 64
        vcompresspd     zmmword ptr [rdi + 8*r8] {k1}, zmm3
        add     r8d, ebx
        cmp     r9, r11
 jne     .LBB0_4

Memory form is micro coded and slower.  We need to generate sequence as show below. 

        kmovb   %k1, %r11d
 pextl   %r11d, %r11d, %ebx
        vcompresspd     %zmm3, %zmm3 {%k1} {z}
 kmovd   %ebx, %k1
        vmovupd %zmm3, (%rdi,%rcx,8) {%k1}

</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJxsVE2TozgM_TXiopqUMeHrwIF0KlVb27nsYXb2tGWwCEwwZm2HdOfXbxmSHtLTXOxYetJ7kiJhbXcaiAqIdxDvA3FxrTbFRMNZOGGEEoMYWmHb80UJE1RavheNNuhawlM30YCvr9-P-MdfWGtJ-CNL8EQDGeHIoiKlzTs22ijUDdZajYas3SCw0lADUYmtc6OFqAR-AH44aVnp3m20OQE_3IAf_mzb-iarfzJgJbBy87rbsX-3Hgk8QuDxRLXTZuOJASvx_k2jI-uUfPw-h8Bf8F2px8lWzkpP9k0imswbTSafAgkpJa6hj5N7t0np6TJKvCkVIaS7cwjp3l9ukO69402pqzYSR2cQ4p2xHQLfoQlDiPerRGelpwoRqXrzsHPobaMe68F9PPpjhdDGi6yeNS7A-1crPdGvoCZka21CLvXxXPgLJtu17kezxsXnswy5yMiAlyaDeP-h_a45-ipPJn9XUatxMeYLw5n-z4Hmx0e7594fV9PUWVRdbfQ8dRLFINH2-kpmg_g34UAk0emPUURL_11oqAmFRdvqK1bU6-s8iF_0AHi8FBN4bMJwrtlIb65fbPPTyrpcn2V9rh_weK7K4vuYliXRamBYOZOQC-LetsVrFfs-c08xM89GdsBf_KX2wAx4vs4CrAxkEck8ykVARZhuI87TNM6DtmAJk0lESVSHSUNpFiW1zPKoSVjYbFlSBV3BGY9ZxHMWx3GcbyqqJFUNz5oqrdM0hy0jJbp-0_eT8v_goLP2QkUYRXGSBb2oqLfzmuF8oCvOVuDcbx1TeNC36nKysGV9Z539FcZ1rp_3U_n9Rxx6fywn3Uk8Pq-Xl3vFsRuwPO7xNkxktsHF9MWnLdO59lJtaq2AH3ya-_FtNPon1Q74YSZngR_u7KeC_x8AAP__YdJ-Ug">