<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/56684>56684</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [X86] Failure to reassociate PMULUDQ mul-by-constant pairs of nodes
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            backend:X86,
            missed-optimization
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          RKSimon
      </td>
    </tr>
</table>

<pre>
    Noticed while working on https://reviews.llvm.org/D129765
```
define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
  %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
  %ret = zext <4 x i1> %cmp to <4 x i32>
  ret <4 x i32> %ret
}
```
llc -mtriple=x86_64--
```
.LCPI0_0:
        .long   4294967295                      # 0xffffffff
        .long   0                               # 0x0
        .long   0                               # 0x0
        .long   0                               # 0x0
.LCPI0_1:
        .long   1                               # 0x1
        .long   1                               # 0x1
        .long   1                               # 0x1
        .long   1                               # 0x1
.LCPI0_2:
        .long   3067833783                      # 0xb6db6db7
        .long   4294967295                      # 0xffffffff
        .long   1                               # 0x1
        .long   0                               # 0x0
.LCPI0_3:
        .long   2147483648                      # 0x80000000
        .zero   4
        .long   268435456                       # 0x10000000
        .zero   4
.LCPI0_4:
        .long   2147483648                      # 0x80000000
        .long   2147483648                      # 0x80000000
        .long   2147483648                      # 0x80000000
        .long   2147483648                      # 0x80000000
.LCPI0_5:
        .long   2454267026                      # 0x92492492
        .long   2147483649                      # 0x80000001
        .long   2415919103                      # 0x8fffffff
        .long   2147483647                      # 0x7fffffff
test_urem_even_allones_and_poweroftwo_and_one: # @test_urem_even_allones_and_poweroftwo_and_one
        pshufd  $245, %xmm0, %xmm1              # xmm1 = xmm0[1,1,3,3]
        pmuludq .LCPI0_0(%rip), %xmm1
        movdqa  .LCPI0_1(%rip), %xmm2           # xmm2 = [1,1,1,1]
        pmuludq %xmm2, %xmm1
        pshufd  $237, %xmm1, %xmm3              # xmm3 = xmm1[1,3,2,3]
        pmuludq .LCPI0_2(%rip), %xmm0
        pmuludq .LCPI0_3(%rip), %xmm0
        pshufd  $237, %xmm0, %xmm4              # xmm4 = xmm0[1,3,2,3]
        punpckldq       %xmm3, %xmm4            # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
        pshufd  $232, %xmm1, %xmm1              # xmm1 = xmm1[0,2,2,3]
        pshufd  $232, %xmm0, %xmm0              # xmm0 = xmm0[0,2,2,3]
        punpckldq       %xmm1, %xmm0            # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
        por     %xmm4, %xmm0
        pxor    .LCPI0_4(%rip), %xmm0
        pcmpgtd .LCPI0_5(%rip), %xmm0
        pandn   %xmm2, %xmm0
        retq
```

It should be possible to merge the pmuludq mul-by-constant pairs. Naturally we have to be careful due to the implicit zero-extension of the instruction, but in many of these cases at least one of the pairs of elements are multiply-by-one. PMULDQ possibly has similar cases but I haven't found any (the PMULUDQ cases appear due to a expansion of a mixture of v4i32 multiplies and rotates).

https://gcc.godbolt.org/z/nfq8K7sTT
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzdWNtu4zYQ_Rr5hbBBUdTtwQ_ZuAGC3S627S6wbwYl0TYbSVREypd8fYe0rMiO5HhbLIpWkGKS4pw5c4Y2OUlkdph_llqkPEO7jcg52sn6SZRrJEu00bpSjnfnkAe4a74VfKdmeb4tZrJew9DCJXEY-A5eOPjOCXB7227GV6LkyPHuKdoj4RHH-wU5FGuu9LKpebHkW14uWZ7LkqslK7NlJXe8liu9k7YL4w6JLgCI_90hMSplU-5EmSEn_HD0h8w7gwsuF6htvDW9N6MwglxqOqZFSUzjICSxfxpxg64Flj38tKgsvDAN_vzWg_Hbc4JPOEONM-Saa4v8wve6g3VbVONNy3NvJ9uj4QUNGGyTEi4Gs5PnKZoWuhZVzsHtPgqWAZ1OB-fOPt1_ecRLbFZC6_R4zSB1a_h8FRANXg7xEN6v2msYAw-bXmDgf9u41cId1cK9Cc39Txu3IpBRETwchJHnwXMNLQkyc4c_b1H9I0n-zrrwRiUhLg1p5AU0uoYW4eN1gfECP4tGkhHoIKKeT_3gepS3QLdh0J8Txv8AoxXIHxeI-pQEISYj2TiCxoTa-x1i8Q3ERlYvoa4fu7GLr34Fo6tfn45HeA0jPMP4se3du7MgP3wsOKNbqU2zygwdCurb3Zf4-6LAr033LXE7anZcO9P_4MJs83j28RcXPoomb7Jn1O2FcDCBbVZUcBjpuTk3KuQ2e2YIdZvGoBF5S4xYYj1Sx2eMVIszyqMvkBf2p3XNi1XS8vBOArktFyMOuUUgMhwrvmrk3WY0Ek0v3XQwGnqZ7ivRNGWVPuVA7YRgRRrxcIlPAR8bTHJvrfo9an333w3ktR8hGczXOwvatT6P0Y1EOOKjpyIe9IH7Kr7jY1BFd8TDMP5JKfesh89UdIdVlHXPKx1fT_vjxG7vu2kRwpF8rTPU7Qc3GcHPV9kxIqPz4PT-PHgSP_591EhtZJNnKOEQpVIigboN6oOC12tobHj3xYKPaXKYprJUmpUaGIhazdBnppsafmIPaMfRhm2tNYClrOarJkdZY0cMkiiqXKRCI3NQmEJxwksloDaUq-NrAK6bVMOQCSdpNAyhgpWHdoYyqIorxDTKOVMaCkt-srZ0TIfnvOClhlk1N6Q11CUHwxwmz9CXX799Wvx2CvUAjBVSohA5q1tw4_fRRgI0Qo1WUB5myLCAxBhPBuIbYLRcqoqDbRsmQ3wPqTmFxVAh9qCPZbmlplZrGQljCri11Az2K0j1rJ-X83J5naaztcwSmeu2Wn6Bp1w9Rx9D9fXrhM_dwI_8OPRxOMnmXhZ7MZtooXM-h_X8PQpgRaMHJnJDBWjWoJ6SqQDXXTiD-TW8S5lxNWnqfH7BSuhNk8xSCVXqg6nk249pVcs_eaqhK5RqTHAPfgAHy8lmjkOXuasUe7GPcYT5KglSL2MszfyQcS-d5CzhuTKsHUISlj7xMgOPJgTSrnJSACzPprLSkLgXdlwwBEKciDnBhOCQeG7ouTSYeZgTnyZhnEIBAa7gdMAL0KH7z8OknlvOSbNW8DIXSr_-W2ICKol1ya2KBp81eiPr-e8f_xCFLCc2vLmN7S_7Rkq6">