<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/56684>56684</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[X86] Failure to reassociate PMULUDQ mul-by-constant pairs of nodes
</td>
</tr>
<tr>
<th>Labels</th>
<td>
backend:X86,
missed-optimization
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
RKSimon
</td>
</tr>
</table>
<pre>
Noticed while working on https://reviews.llvm.org/D129765
```
define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
%urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
```
llc -mtriple=x86_64--
```
.LCPI0_0:
.long 4294967295 # 0xffffffff
.long 0 # 0x0
.long 0 # 0x0
.long 0 # 0x0
.LCPI0_1:
.long 1 # 0x1
.long 1 # 0x1
.long 1 # 0x1
.long 1 # 0x1
.LCPI0_2:
.long 3067833783 # 0xb6db6db7
.long 4294967295 # 0xffffffff
.long 1 # 0x1
.long 0 # 0x0
.LCPI0_3:
.long 2147483648 # 0x80000000
.zero 4
.long 268435456 # 0x10000000
.zero 4
.LCPI0_4:
.long 2147483648 # 0x80000000
.long 2147483648 # 0x80000000
.long 2147483648 # 0x80000000
.long 2147483648 # 0x80000000
.LCPI0_5:
.long 2454267026 # 0x92492492
.long 2147483649 # 0x80000001
.long 2415919103 # 0x8fffffff
.long 2147483647 # 0x7fffffff
test_urem_even_allones_and_poweroftwo_and_one: # @test_urem_even_allones_and_poweroftwo_and_one
pshufd $245, %xmm0, %xmm1 # xmm1 = xmm0[1,1,3,3]
pmuludq .LCPI0_0(%rip), %xmm1
movdqa .LCPI0_1(%rip), %xmm2 # xmm2 = [1,1,1,1]
pmuludq %xmm2, %xmm1
pshufd $237, %xmm1, %xmm3 # xmm3 = xmm1[1,3,2,3]
pmuludq .LCPI0_2(%rip), %xmm0
pmuludq .LCPI0_3(%rip), %xmm0
pshufd $237, %xmm0, %xmm4 # xmm4 = xmm0[1,3,2,3]
punpckldq %xmm3, %xmm4 # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
pshufd $232, %xmm1, %xmm1 # xmm1 = xmm1[0,2,2,3]
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
punpckldq %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
por %xmm4, %xmm0
pxor .LCPI0_4(%rip), %xmm0
pcmpgtd .LCPI0_5(%rip), %xmm0
pandn %xmm2, %xmm0
retq
```
It should be possible to merge the pmuludq mul-by-constant pairs. Naturally we have to be careful due to the implicit zero-extension of the instruction, but in many of these cases at least one of the pairs of elements are multiply-by-one. PMULDQ possibly has similar cases but I haven't found any (the PMULUDQ cases appear due to a expansion of a mixture of v4i32 multiplies and rotates).
https://gcc.godbolt.org/z/nfq8K7sTT
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzdWNtu4zYQ_Rr5hbBBUdTtwQ_ZuAGC3S627S6wbwYl0TYbSVREypd8fYe0rMiO5HhbLIpWkGKS4pw5c4Y2OUlkdph_llqkPEO7jcg52sn6SZRrJEu00bpSjnfnkAe4a74VfKdmeb4tZrJew9DCJXEY-A5eOPjOCXB7227GV6LkyPHuKdoj4RHH-wU5FGuu9LKpebHkW14uWZ7LkqslK7NlJXe8liu9k7YL4w6JLgCI_90hMSplU-5EmSEn_HD0h8w7gwsuF6htvDW9N6MwglxqOqZFSUzjICSxfxpxg64Flj38tKgsvDAN_vzWg_Hbc4JPOEONM-Saa4v8wve6g3VbVONNy3NvJ9uj4QUNGGyTEi4Gs5PnKZoWuhZVzsHtPgqWAZ1OB-fOPt1_ecRLbFZC6_R4zSB1a_h8FRANXg7xEN6v2msYAw-bXmDgf9u41cId1cK9Cc39Txu3IpBRETwchJHnwXMNLQkyc4c_b1H9I0n-zrrwRiUhLg1p5AU0uoYW4eN1gfECP4tGkhHoIKKeT_3gepS3QLdh0J8Txv8AoxXIHxeI-pQEISYj2TiCxoTa-x1i8Q3ERlYvoa4fu7GLr34Fo6tfn45HeA0jPMP4se3du7MgP3wsOKNbqU2zygwdCurb3Zf4-6LAr033LXE7anZcO9P_4MJs83j28RcXPoomb7Jn1O2FcDCBbVZUcBjpuTk3KuQ2e2YIdZvGoBF5S4xYYj1Sx2eMVIszyqMvkBf2p3XNi1XS8vBOArktFyMOuUUgMhwrvmrk3WY0Ek0v3XQwGnqZ7ivRNGWVPuVA7YRgRRrxcIlPAR8bTHJvrfo9an333w3ktR8hGczXOwvatT6P0Y1EOOKjpyIe9IH7Kr7jY1BFd8TDMP5JKfesh89UdIdVlHXPKx1fT_vjxG7vu2kRwpF8rTPU7Qc3GcHPV9kxIqPz4PT-PHgSP_591EhtZJNnKOEQpVIigboN6oOC12tobHj3xYKPaXKYprJUmpUaGIhazdBnppsafmIPaMfRhm2tNYClrOarJkdZY0cMkiiqXKRCI3NQmEJxwksloDaUq-NrAK6bVMOQCSdpNAyhgpWHdoYyqIorxDTKOVMaCkt-srZ0TIfnvOClhlk1N6Q11CUHwxwmz9CXX799Wvx2CvUAjBVSohA5q1tw4_fRRgI0Qo1WUB5myLCAxBhPBuIbYLRcqoqDbRsmQ3wPqTmFxVAh9qCPZbmlplZrGQljCri11Az2K0j1rJ-X83J5naaztcwSmeu2Wn6Bp1w9Rx9D9fXrhM_dwI_8OPRxOMnmXhZ7MZtooXM-h_X8PQpgRaMHJnJDBWjWoJ6SqQDXXTiD-TW8S5lxNWnqfH7BSuhNk8xSCVXqg6nk249pVcs_eaqhK5RqTHAPfgAHy8lmjkOXuasUe7GPcYT5KglSL2MszfyQcS-d5CzhuTKsHUISlj7xMgOPJgTSrnJSACzPprLSkLgXdlwwBEKciDnBhOCQeG7ouTSYeZgTnyZhnEIBAa7gdMAL0KH7z8OknlvOSbNW8DIXSr_-W2ICKol1ya2KBp81eiPr-e8f_xCFLCc2vLmN7S_7Rkq6">