<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/76049>76049</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[EarlyCSE] Can not merge consecutive bitfield accesses in case of pointers
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
adamszilagyi
</td>
</tr>
</table>
<pre>
When consecutive bitfields are set, the `EarlyCSE` pass is able to identify and then remove the load/store pairs which access the individual bitfields one by one, then merge the `and` instruction which sets the bitfield values, but if the bitfields are accessed through pointer, the optimiziation fails to remove the redundant load/store pairs.
Take a look at the following example:
```.C
struct bits {
unsigned b0 : 1;
unsigned b1 : 1;
unsigned b2 : 2;
unsigned b4 : 2;
unsigned b6 : 1;
unsigned b7 : 1;
};
struct bits b;
void clear_bits(void) {
b.b0 =
b.b1 =
b.b2 =
b.b4 =
b.b6 =
b.b7 = 0;
}
```
For this code the IR before the `EarlyCSE` pass is the following:
```
define dso_local arm_aapcscc void @clear_bits() #0 {
entry:
%bf.load = load i8, ptr @b, align 4
%bf.clear = and i8 %bf.load, 127
%bf.set = or i8 %bf.clear, 0
store i8 %bf.set, ptr @b, align 4
%bf.load1 = load i8, ptr @b, align 4
%bf.clear2 = and i8 %bf.load1, -65
%bf.set3 = or i8 %bf.clear2, 0
store i8 %bf.set3, ptr @b, align 4
%bf.load4 = load i8, ptr @b, align 4
%bf.clear5 = and i8 %bf.load4, -49
%bf.set6 = or i8 %bf.clear5, 0
store i8 %bf.set6, ptr @b, align 4
%bf.load7 = load i8, ptr @b, align 4
%bf.clear8 = and i8 %bf.load7, -13
%bf.set9 = or i8 %bf.clear8, 0
store i8 %bf.set9, ptr @b, align 4
%bf.load10 = load i8, ptr @b, align 4
%bf.clear11 = and i8 %bf.load10, -3
%bf.set12 = or i8 %bf.clear11, 0
store i8 %bf.set12, ptr @b, align 4
%bf.load13 = load i8, ptr @b, align 4
%bf.clear14 = and i8 %bf.load13, -2
%bf.set15 = or i8 %bf.clear14, 0
store i8 %bf.set15, ptr @b, align 4
ret void
}
```
Here, the `load` always gets the value from `b`, then the `and/or` setting an individual bitfield, and the `store` saves back the result to `b`. But if i do the same thing, just use a pointer to the `bits` struct, the IR will be the following (with the C code):
```.C
struct bits {
unsigned b0 : 1;
unsigned b1 : 1;
unsigned b2 : 2;
unsigned b4 : 2;
unsigned b6 : 1;
unsigned b7 : 1;
};
struct bits *b;
void clear_bits(void) {
b->b0 =
b->b1 =
b->b2 =
b->b4 =
b->b6 =
b->b7 = 0;
}
```
```
define dso_local arm_aapcscc void @clear_bits() #0 {
entry:
%0 = load ptr, ptr @b, align 4, !tbaa !5
%bf.load = load i8, ptr %0, align 4
%bf.clear = and i8 %bf.load, 127
%bf.set = or i8 %bf.clear, 0
store i8 %bf.set, ptr %0, align 4
%1 = load ptr, ptr @b, align 4, !tbaa !5
%bf.load1 = load i8, ptr %1, align 4
%bf.clear2 = and i8 %bf.load1, -65
%bf.set3 = or i8 %bf.clear2, 0
store i8 %bf.set3, ptr %1, align 4
%2 = load ptr, ptr @b, align 4, !tbaa !5
%bf.load4 = load i8, ptr %2, align 4
%bf.clear5 = and i8 %bf.load4, -49
%bf.set6 = or i8 %bf.clear5, 0
store i8 %bf.set6, ptr %2, align 4
%3 = load ptr, ptr @b, align 4, !tbaa !5
%bf.load7 = load i8, ptr %3, align 4
%bf.clear8 = and i8 %bf.load7, -13
%bf.set9 = or i8 %bf.clear8, 0
store i8 %bf.set9, ptr %3, align 4
%4 = load ptr, ptr @b, align 4, !tbaa !5
%bf.load10 = load i8, ptr %4, align 4
%bf.clear11 = and i8 %bf.load10, -3
%bf.set12 = or i8 %bf.clear11, 0
store i8 %bf.set12, ptr %4, align 4
%5 = load ptr, ptr @b, align 4, !tbaa !5
%bf.load13 = load i8, ptr %5, align 4
%bf.clear14 = and i8 %bf.load13, -2
%bf.set15 = or i8 %bf.clear14, 0
store i8 %bf.set15, ptr %5, align 4
ret void
}
```
Here each first `load` will get the address of `b` then the second `load` reads the value from that address, the rest is the same. In these cases the `EarlyCSE` pass fails to recognize that the intermediate value `load`s can be removed alongside with the `store`s, so the result will be this quite big machine code (tested if for different architectures, this example is ARM):
```
clear_bits: // @clear_bits
adrp x8, b
ldr x9, [x8, :lo12:b]
ldrb w10, [x9]
and w10, w10, #0xffffffdf
strb w10, [x9]
ldr x9, [x8, :lo12:b]
ldrb w10, [x9]
and w10, w10, #0xffffffef
strb w10, [x9]
ldr x9, [x8, :lo12:b]
ldrb w10, [x9]
and w10, w10, #0xfffffff7
strb w10, [x9]
ldr x9, [x8, :lo12:b]
ldrb w10, [x9]
and w10, w10, #0xfffffffb
strb w10, [x9]
ldr x9, [x8, :lo12:b]
ldrb w10, [x9]
and w10, w10, #0xfffffffd
strb w10, [x9]
ldr x8, [x8, :lo12:b]
ldrb w9, [x8]
and w9, w9, #0xfe
strb w9, [x8]
ret
```
Instead of what GCC does, merging the consecutive bitfield sets:
```
clear_bits:
adrp x0, .LANCHOR0
ldr x1, [x0, #:lo12:.LANCHOR0]
ldrb w0, [x1]
and w0, w0, -64
strb w0, [x1]
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzUWV2P4joS_TXmpdQocT6ABx4Yetjb0n5IvSvt48iJK-A7xmZtp-meX7-yQyBAoOle7t6Z1mgC5aryOeWKfUiYtWKpEKck-0KyxwGr3UqbKeNsbX8IyZZvYlBo_jb99woVlFpZLGsnXhAK4SqBkltgBsGiI3QOboVA8ugrM_Jt_s-vJI9gw6wFYYEVEsFpEByVE9UbMMW9vwKDa_2CIVZqxgldWKcNwoYJY2G7EuUKWFmitcFHKC5eBK-Z7GDQCqF485cdDAVrNEtsETHFPRihrDN16YRWu8QWXZO2zQUvTNZofZqidiCqo9GGbYMGPX6j6-UKNlooh6Ytgd44sRY_BAsTVUxI66l3iBrkteJMuR7KQxI9kmjW_P8v9h2BgdT6OzAXYistpd4KtQR8ZeuNRJLMuiEkj5p_w3ljaDh7DhbI6EtjBKhVWHsORQQkmUFMkp6x-MoYDWP0MHYYSk-HOmP5lZSjkzEyejx8PmNTnIy9aMGhlMjMNz9O6NhbCJ143tDOVgwD48euIT410FNDemrITw2jYIiOsB-vSBfrQhtwK2Gh1LxpiqdnKLDyfXDlPjrqgEsr33zlWAmFwK3-JnXJJDCz_sbYprRlCaFUJI2OqhUqRZPo0CaonHnbTwNAaFZUQ9-0gWz4IMa-8TfO-HyF_8ykWCpIj4PCTCHK3_pi3MnlY2I6Ova36IK3NgfnkMN7R61vc-PsHXYb0btg_Kzx5yjQfg6xj3rIszMSST8L-g6N5GYe6ed4ZP080sAjnZzxyPt5ZO_wyG_mMbqdR2f-cT-NUaARJ2c0Jv00xl0a5ywmt3dV9LnliOMLfRUFJudEYtrPJI7fWZGY3k4m-SSZ9AKZ0NUP9GgZPaTsApf0PS7ZdVAGXdjqbtmRf0ODHRUTNqY8Aia37M3CslUKQSBAZfTaexU-Sas5_DC0eoMutPHxFp3zpzVTfdolQG6kkA8MDEMUe0ELBSu_7wSDraXzKmI35xC-NPJEANfBxbK1Pzn8sUDn8HttHdTWi4edOvHBu1nCdu8nCWdpy_npGbZCSijwRGcQOt4KtwrWeTivCJ38PKrjs7Ljs6qD0NknhccDSb6eao9gi3tstMeW9tiOdEhj-pgS-f_Ihs62uHHm0l1L50Bo7ArG_DW7UXbQLPp5ZMdFMPF9KtAvW2gW_2Sy5SIiep869MsemtGrcuGPUD3XRE8vHm9O7lOFftFEs-R6N_wRqumqbLqIKL3TXdEvu2iW3kV23VV19YIiNMvuVIp-0Uaz7C6i7b6qrR_VR2UbICtXUAljXVe6BTmzxOa5CePcoLWgq1ZDHUSbxVIrDt1Yg4yfCT63Yq5N1Momg9a1v8y9BhvCU0hqEUpm0V78Nd95MFTqpRI_sMnfPOVyaNbIBXMtgAM2CyVTXqY1T5Q4MKnV0gqOsNdpHS0ZkFrd1ZEHnScs_KcWDqEQS1izcuXP_vBIgtCxQ-uQe5VZaQNcVBUaVA6YKVfCYelqg7s6CNs-jvK1mD3_7YpEbL52JETi239B6OJEWuy6oflj3Gz89TW0dHE8KLkJ19ew45DsS-NFkpnUMSXJrCDZ41lI4a_b5lb3MZMzJ38zdJxaX5pEr1X449VxgHU3ZP3T0OIvhbYa_VJoi18KLf9f0I4_jrZD8CLY4LPzDFBxr7Dez2LQXTklnpR1yLjf_bd-k_3LfA5cN7vXGs3S_871G2TfO47wluADm9kO137DChUd_nX29_lv_3iOLpQ0bpm1K3Wo6iF0z3lf1v1qxZfL2rRAI2jy9MK6X0l0XtkBnyZ8kkzYAKfxKEqi0SiPo8FqWhUF5XHOYhaziEUZH08YTnI2zuMRTTkbiCmNaBJTGsVJlGfRsEiiST6KxuVokkXVuCBphGsm5FDKl_VQm-VAWFvjdJRH6WQgWYHShtdVlCrcQhgklJLscWCmPuahqJeWpJEU1tlDFiecDO-59udw9ghzpkBpt3tZ1Lv2u5c9FoQK57nvoN0TFTuojZyunNuEVQ8n2FK4VV0MS70mdOEn310eNkb_jqUjdBEgW0IXgdJ_AwAA__8pbYp3">