<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/100293>100293</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[InstCombine] Missed optimization after #84628
</td>
</tr>
<tr>
<th>Labels</th>
<td>
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
cyyself
</td>
</tr>
</table>
<pre>
## Statement
I found a C++ code pattern missed optimization after #84628 that is widely used in [Verilator](https://github.com/verilator/verilator.git) generated C++ codes which consume [CIRCT](https://github.com/llvm/circt) generated verilog code for RTL circuit simulation.
Without being optimized for vector operations, the code is longer. Even worse, when simulating large RTLs, the branch predictor in the current CPUs usually didn't work for its small size. We should make this pattern to generate code like the original one using vector operations to avoid so many hard-to-predict branches.
## Reduced reproducer
TL;DR: https://godbolt.org/z/3qK3jeo1E
Look at the following C++ code:
```cpp
struct a_struct {
unsigned int value;
unsigned int some_cond;
/* selector A */
unsigned int index;
unsigned int value_0;
unsigned int value_1;
unsigned int value_2;
unsigned int value_3;
/* selector B */
unsigned int value_0_b;
unsigned int value_1_b;
unsigned int value_2_b;
unsigned int value_3_b;
bool use_0;
bool use_1;
bool use_2;
bool use_3;
};
void some_func(a_struct &a) {
if (a.some_cond) {
a.value = ( a.use_0 ? a.value_0_b : 0) |
( a.use_1 ? a.value_1_b : 0) |
( a.use_2 ? a.value_2_b : 0) |
( a.use_3 ? a.value_3_b : 0);
}
else {
a.value = ( 0U == a.index ? a.value_0 : 0) |
( 1U == a.index ? a.value_1 : 0) |
( 2U == a.index ? a.value_2 : 0) |
( 3U == a.index ? a.value_3 : 0);
}
}
```
Compile (on x86-64 target): `clang++ -O3 -S -c -mllvm --jump-is-expensive test.cpp`
Before the commit 56b3222b79632a4bbb36271735556a03b2504791, we will get asm like this all using vector operation:
```asm
_Z9some_funcR8a_struct: # @_Z9some_funcR8a_struct
.cfi_startproc
# %bb.0:
cmpl $0, 4(%rdi)
je .LBB0_3
# %bb.1:
movd 44(%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
punpcklbw %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
punpcklwd %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3]
pxor %xmm1, %xmm1
pcmpeqb %xmm0, %xmm1
movdqu 28(%rdi), %xmm0
pandn %xmm0, %xmm1
pshufd $238, %xmm1, %xmm0 # xmm0 = xmm1[2,3,2,3]
por %xmm1, %xmm0
pshufd $85, %xmm0, %xmm1 # xmm1 = xmm0[1,1,1,1]
por %xmm0, %xmm1
movd %xmm1, %eax
movl %eax, (%rdi)
retq
.LBB0_3:
movdqu 8(%rdi), %xmm0
movdqu 12(%rdi), %xmm1
pshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
pcmpeqd .LCPI0_0(%rip), %xmm0
pand %xmm1, %xmm0
pshufd $238, %xmm0, %xmm1 # xmm1 = xmm0[2,3,2,3]
por %xmm0, %xmm1
pshufd $85, %xmm1, %xmm0 # xmm0 = xmm1[1,1,1,1]
por %xmm1, %xmm0
movd %xmm0, %eax
movl %eax, (%rdi)
retq
```
However, After commit 56b3222b79632a4bbb36271735556a03b2504791, we will get a bad code like this:
```asm
_Z9some_funcR8a_struct: # @_Z9some_funcR8a_struct
.cfi_startproc
# %bb.0:
cmpl $0, 4(%rdi)
je .LBB0_8
# %bb.1:
xorl %ecx, %ecx
cmpb $0, 44(%rdi)
movl $0, %edx
jne .LBB0_2
# %bb.3:
cmpb $0, 45(%rdi)
movl $0, %eax
jne .LBB0_4
.LBB0_5:
orl %edx, %eax
cmpb $0, 46(%rdi)
je .LBB0_7
.LBB0_6:
movl 36(%rdi), %ecx
.LBB0_7:
orl %ecx, %eax
movzbl 47(%rdi), %edx
movl $40, %ecx
xorl %esi, %esi
testb %dl, %dl
je .LBB0_17
.LBB0_16:
movl (%rdi,%rcx), %esi
.LBB0_17:
orl %esi, %eax
movl %eax, (%rdi)
retq
.LBB0_8:
movl 8(%rdi), %ecx
xorl %edx, %edx
movl $0, %esi
testl %ecx, %ecx
je .LBB0_9
# %bb.10:
movl $0, %eax
cmpl $1, %ecx
je .LBB0_11
.LBB0_12:
orl %esi, %eax
cmpl $2, %ecx
jne .LBB0_14
.LBB0_13:
movl 20(%rdi), %edx
.LBB0_14:
orl %edx, %eax
cmpl $3, %ecx
sete %dl
movl $24, %ecx
xorl %esi, %esi
testb %dl, %dl
jne .LBB0_16
jmp .LBB0_17
.LBB0_2:
movl 28(%rdi), %edx
cmpb $0, 45(%rdi)
movl $0, %eax
je .LBB0_5
.LBB0_4:
movl 32(%rdi), %eax
orl %edx, %eax
cmpb $0, 46(%rdi)
jne .LBB0_6
jmp .LBB0_7
.LBB0_9:
movl 12(%rdi), %esi
movl $0, %eax
cmpl $1, %ecx
jne .LBB0_12
.LBB0_11:
movl 16(%rdi), %eax
orl %esi, %eax
cmpl $2, %ecx
je .LBB0_13
jmp .LBB0_14
```
I have tried to revert the commit 56b3222b79632a4bbb36271735556a03b2504791 based on the recent main branch commit a51d2632827bffa0fe8f21a1c8807985413155a3, then it fixed.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzcWV9vozoW_zTuy1EibENIHvLQpLfa0c5qr-bO3SvtS2XgJPGMwQw2aTqffmUgBAIk7d2rfdho3FL88-_8P4cMwhi5zxDXJNiQ4OlBlPagi3X89mZQ7R4inbytCeOEcfjNCospZpZ4T8R7rH9-gp0uswQEbAnbELaBWCcIubAWiwxSaQwmoHMrU_lTWKkzEDuLBRDGl_6CLcEehAVp4FUmqN6gdHiZAQk2_8JCKmF1QYInwpYHa3ND-CNhz4Q976U9lNE81ilhz8cW2bme76UlbAV7zLAQFpOeigZeDzI-QKwzU6bo5G0_fdl-vStLqaP7FcsivqKvJOt97YGdLuDL18_gcKW0YGRaqsoB867__pD2oEsLEcpsf_YTJtXxI8ZWF6Bzxy91Zgjbgj1gLUAaUDrbYzGHX46YwasuDDrE6wGzVly2ByWKPTpd2vNRIbL4AHmBiaxEyKzmLYsCMwvbX383UJpSKPUGiUwywkLrBHyv9JLWgEmFUmDkT5zDHwjmoEuVQCq-I9iDNG0CWN06qFZbyQqCoAu5l5lQoDOE0jhNBwa74-KoZQJGQyqyNziIIplZPWt0b0xB03Nqk7BfMCljTKDAvNDusuiCvn4mfPP0hfBHuIq2TiKt7FwXe8KefxL2zH_8nX9DTX_pnv-s9XcQtrJlp5XSr86Eboo5xq5WC6_-F-d5fcfYoowtiJfmgoSbegMAoMyqynTFYOEoVImET20bneJLrLOkB6nMeQSDqvbrIxBW2VgjegwyS_A0LaCS_-LdA9B7AHYPwG9asOlbMKHlS3RXz_sQdh_Cu5BIa-Wa15WT2tt0_DYbv31xAwmfLtfVz6YeUnzZlVlM2PKSP2whXEvq5ZHcgcPMLzlyjXAfMa9sAsKfHBzEvLIFCH8-7znHgisXr2bYQp_i8rkQ0B4B_TgB6xGwjxPwHgHvEPQzLXy6_IHK4H0Xeb-7S_eXmFfl03cWjGrqDtKbB-n4wVET2U0m9gEmfpOJwx2vXS7OTa6bsVud5lKhE6QzOC0Xs4UP1o0lWzE-guuKSmT7pnvO_slh9hvMYpilbt7CbPatTPOZNDM85ZgZeUSwaOzc9dK-sA3udIHNnExTaSFYRJwxFoWrBWfCj6KIL1hIQx4EwUJ4PGKB54crWg1PhFepFOzRgjDpeVxJA27ejY-pqT4vTFrfefn3qq3XL225OruHoeBAfG_iQC9-83gnX4wVhc0LHbeDDwgLomjutUqd8XGaKyDM95yZPmFLwoIikS4APdw3bPg_bzbeC78mpgPiVB8T8PuMbOvgpzT1Rk2sNlyypZiSYONVD13bn1joq1-1oLzM8vi7il5biop6QkpPQIVzEtjWLdos1izeLL9ZQbMWzQqrFTz1LW70eU2m9fm4EgMhJ11cyOmFnJ69Eqc5_oiG0ukwPj9KABef0RCd-USWZGPWXPHl5lDuEofzGV92YVNRv3YGJcHmbDgbN762fYR5Updl0EVdtBpPP9oNDO0Ehd7R5bafhzqjOA1gqoG5vQo1qMUC7Y_68lyGY1XnononqH00ZePo6Qh7o04dVPWtdK_XwKtV9iYw_7z99ZP34jWKyfyWGS5FP5ITvfycToqxnPhIfr6rVrr5OV0qo7Xykfyc9ks_P72_KD9HB_7f9CsesXCHH6tv-__tJIZIJL3vj9L8P0_d5f2pe9KFqmIUn86RjE8D2VFtXiP-hvxL1NuSx-TM9y3DjnLsWrlhcxpIDkYkj4m8Tsa-ZL_bEoOB1LNHktMU3UCvxc2I1ILCrtTFWCOuzOCLkd7axuRM1Z52yp7LLJ5UONXHn5EC8MMx8mSydP2LS6-z4tRIdntGnlFG9lHu4TqqUYlqQIma8BDtuYhO-6hjxNZdOMNX1yq0nGPxHWr-p_pX87kes8tJzcfG7MW1XZ9esu9GeLxbfleDtLiOYb9brAbdYtiHbhZb1aTqTfo-kZT2YsXG83o6SB2JbFJi1uZXr_Dp6MNQRce8W2XSko0qe7NrnJXlU8oatAhtvQwaHPP_B-XY65V0cbWb5t3dXsV2wtd6cjThrzP6XX2-G6Obvb6XYUFXQX-664490Q6o3x3n-9Oh7-WbTu75eDVuwugT-SD87xmVN4v4KjdYryBGv9JXXHR0prWS_8Jab0ud30xb_8ZD5yc4iCOCLSQmYDUU7hHU_pn_CIJIVG-t6rciBcaYWUiFzM5vTho-EdCELThbsjDa7YS3w-WOUUHj5dILV8vAp5wGgeDNe5cMpIWdPGEyf0jWPFnxlXjANQ0ZDVdsEdKHwzpM_CAIvUAgUvSiHUOKfBEG8YouQ74KH-Saecz3QuZ73Pc5nfsB33mhjxRXQRTvEuJ7mAqp5kod07ku9g_SmBLX1PPYij8oEaEy59d9xdqhZlG5N8T3lDTWXM5ZaVX1YvBTZuxWp5HMkARP8I-7L_UeykKt779Dq2Tnhf6GsSXsudLTEPbcqHpcs_8EAAD__5Bzc04">