<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/97720>97720</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [X86] Failure to match PMADDUBSW horizontal adds
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            backend:X86
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          RKSimon
      </td>
    </tr>
</table>

<pre>
    We correctly match this as PMADDWD:
```c
}
__v4si _mm_mul_hadd_epi16(__v8hi x, __v8hi y) {
    __v4si xl = __builtin_convertvector(__builtin_shufflevector(x,x,0,2,4,6), __v4si);
 __v4si yl = __builtin_convertvector(__builtin_shufflevector(y,y,0,2,4,6), __v4si);
    __v4si xh = __builtin_convertvector(__builtin_shufflevector(x,x,1,3,5,7), __v4si);
 __v4si yh = __builtin_convertvector(__builtin_shufflevector(y,y,1,3,5,7), __v4si);
    return (xl * yl) + (xh*yh);
}
```
```asm
_mm_mul_hadd_epi16(short vector[8], short vector[8]):          # @_mm_mul_hadd_epi16(short vector[8], short vector[8])
        vpmaddwd        %xmm0, %xmm1, %xmm0
 retq
```
But fail to do the same for PMADDUBSW:
```c
__v8hi _mm_mul_hadd_epi8(__v16qu x, __v16qi y) {
    __v8hu xl = __builtin_convertvector(__builtin_shufflevector(x,x,0,2,4,6,8,10,12,14), __v8hu);
    __v8hi yl = __builtin_convertvector(__builtin_shufflevector(y,y,0,2,4,6,8,10,12,14), __v8hi);
    __v8hu xh = __builtin_convertvector(__builtin_shufflevector(x,x,1,3,5,7,9,11,13,15), __v8hu);
    __v8hu yh = __builtin_convertvector(__builtin_shufflevector(y,y,1,3,5,7,9,11,13,15), __v8hi);
    return (xl * yl) + (xh*yh);
}
```
```asm
_mm_mul_hadd_epi8(unsigned char vector[16], char vector[16]):      # @_mm_mul_hadd_epi8(unsigned char vector[16], char vector[16])
        vpmovwb %xmm1, %xmm3
        vpshufb .LCPI0_1(%rip), %xmm1, %xmm1    # xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
        vpandd .LCPI0_2(%rip){1to4}, %xmm0, %xmm2
        vpsrlw  $8, %xmm0, %xmm0
 vpmovsxbw       %xmm3, %xmm3
        vpmovsxbw       %xmm1, %xmm1
 vpmullw %xmm2, %xmm3, %xmm2
        vpmullw %xmm0, %xmm1, %xmm0
 vpaddw  %xmm0, %xmm2, %xmm0
        retq
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzEVktv4zYQ_jX0hdiAHL0POtgSDBRtgUUXRXoTKIkO2VKWw4cc99cXlOXHOoqz2M2iggWPhuTMN8P5hmTGyKct5zmKVigqF8xZ0ev8j1-_yK7fLuq-PeSPHDe91ryx6oA7ZhuBrZAGM4M__74sy8cSBUtESkSWKCbHXzN9J-VRqKohNBJXXVd1TlWCtW3Fd5LGCNKqGlIh8QuCAk_yAUGGUbI6LsYY48nAi8IoKHFV1U4qK7dV028Hru3AG9vr0dhpxAi32Sh-HvH2_UsQFICgCBEUMYJschsa6T-Ck8_J4eH7HR4QFIdvd3gVpPjhICmCIkBQRAiK5P0gv9_hKchvdIgx1tw6vcUerMIIlvigxt2G1agTCJYHcb3oXETn6rr5ZKabqmyuvIzotcUT4GiVoqj04GbVGQpGjMcHQYBRSD7A6jl4_wy7jrXtvr24iV66zlfJJNKLOIXqk_Y8m4SVs3jDpMK2x22PreDYsI7jTa-P7Pxz9eXxTX5OdLuNMD2yksbP7kxLGj-_wctUuJ_AyyL1VeWV1GtpeCmqVLgZ5ox948PZehfGHIHHbHw4gQvvlHol9VoavZ8N9xNofRfG_8l0X7FuO55lLW4E0xcO0nji5qz6wvg32P4Dlm9Z3w_7-jXFg9t5fi9q_PBb8fkXUlEEKYJIy92U6lfr6Qn8KPsNH8ej1ft7V7i7b1TeQmPbtj0hg6-QJStq-9Bv4FXvOovwKkat9h51mM7OP7W9MWnmpd5_1SqDO8mbm3-drIthp9T-DO_K4Nugr5fcb9fDznf4mdYOr-dOz0yHX7R50GZBxhY8pwkQkiUBgYXI45CzmsbZJiaQbdImiaGhAeObqK43JIsXMgcCIUlISCFMAR4ikiWbjAYJqUldxy0KCe-YVA9KDd1Dr58W0hjH8yxJgCwUq7ky450QoGbNP3zbomD5VxojAH9N1Llf96l2TwaFREljzcWSlVaNF0o_PyrxmknlNPcn1PH2eD6XsOi1_LffWqYwa1uzcFrlwtqd8ScWrBGsn6QVrn5o-g7B2ruY_j7tdP83byyC9YjcIFgfwQ85_BcAAP__VzYb-Q">