<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/63549>63549</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[LoongArch] CSR/IOCSR reads are mis-optimized
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
xen0n
</td>
</tr>
</table>
<pre>
Found during investigation of https://github.com/ClangBuiltLinux/boot-utils/issues/108#issuecomment-1608457335. It is due to the wrong cleanup of commit 2efdacf74c54 that removed side-effect markers from CSR/IOCSR read intrinsics.
<details><blockquote>
The involved code:
```c
// arch/loongarch/kernel/traps.c
static void init_restore_fp(void)
{
if (!used_math()) {
/* First time FP context user. */
init_fpu();
} else {
/* This task has formerly used the FP context */
if (!is_fpu_owner())
own_fpu_inatomic(1);
}
BUG_ON(!is_fp_enabled());
}
```
Faulty generated code:
```
9000000000223d40 <init_restore_fp>:
9000000000223d40: 63 c0 ff 02 addi.d $sp, $sp, -16
9000000000223d44: 61 20 c0 29 st.d $ra, $sp, 8
9000000000223d48: 76 00 c0 29 st.d $fp, $sp, 0
9000000000223d4c: 44 00 c0 28 ld.d $a0, $tp, 0
9000000000223d50: 85 54 00 2a ld.bu $a1, $a0, 21
9000000000223d54: a5 80 40 03 andi $a1, $a1, 32
9000000000223d58: 16 08 00 04 csrrd $fp, 2
9000000000223d5c: a0 4c 00 44 bnez $a1, 76 <init_restore_fp+0x68>
9000000000223d60: 84 00 0c 24 ldptr.w $a0, $a0, 3072
9000000000223d64: c5 06 80 03 ori $a1, $fp, 1
9000000000223d68: 25 08 00 04 csrwr $a1, 2
9000000000223d6c: 45 20 c0 02 addi.d $a1, $tp, 8
9000000000223d70: 06 00 84 03 ori $a2, $zero, 256
9000000000223d74: a0 98 6c 38 amor_db.d $zero, $a2, $a1
9000000000223d78: 45 00 c0 28 ld.d $a1, $tp, 0
9000000000223d7c: a5 20 c0 28 ld.d $a1, $a1, 8
9000000000223d80: a6 e8 3f 26 ldptr.d $a2, $a1, 16360
9000000000223d84: c6 04 80 03 ori $a2, $a2, 1
9000000000223d88: a6 e8 3f 27 stptr.d $a2, $a1, 16360
9000000000223d8c: 00 14 59 54 bl 22804 <_init_fpu>
9000000000223d90: 44 00 c0 28 ld.d $a0, $tp, 0
9000000000223d94: 45 00 00 14 lu12i.w $a1, 2
9000000000223d98: 86 50 80 28 ld.w $a2, $a0, 20
9000000000223d9c: c5 14 15 00 or $a1, $a2, $a1
9000000000223da0: 85 50 80 29 st.w $a1, $a0, 20
9000000000223da4: 00 68 00 50 b 104 <init_restore_fp+0xcc>
9000000000223da8: 44 20 c0 28 ld.d $a0, $tp, 8
9000000000223dac: 84 00 44 03 andi $a0, $a0, 256
9000000000223db0: 80 5c 00 44 bnez $a0, 92 <init_restore_fp+0xcc>
9000000000223db4: 44 db 05 1a pcalau12i $a0, 11994
9000000000223db8: 84 00 d4 02 addi.d $a0, $a0, 1280
9000000000223dbc: 84 40 00 2a ld.bu $a0, $a0, 16
9000000000223dc0: 84 20 40 03 andi $a0, $a0, 8
9000000000223dc4: 80 48 00 40 beqz $a0, 72 <init_restore_fp+0xcc>
9000000000223dc8: 44 20 c0 28 ld.d $a0, $tp, 8
9000000000223dcc: 84 00 44 03 andi $a0, $a0, 256
9000000000223dd0: 80 3c 00 44 bnez $a0, 60 <init_restore_fp+0xcc>
9000000000223dd4: c4 06 80 03 ori $a0, $fp, 1
9000000000223dd8: 24 08 00 04 csrwr $a0, 2
9000000000223ddc: 44 20 c0 02 addi.d $a0, $tp, 8
9000000000223de0: 05 00 84 03 ori $a1, $zero, 256
9000000000223de4: 80 94 6c 38 amor_db.d $zero, $a1, $a0
9000000000223de8: 44 00 c0 28 ld.d $a0, $tp, 0
9000000000223dec: 84 20 c0 28 ld.d $a0, $a0, 8
9000000000223df0: 85 e8 3f 26 ldptr.d $a1, $a0, 16360
9000000000223df4: a5 04 80 03 ori $a1, $a1, 1
9000000000223df8: 85 e8 3f 27 stptr.d $a1, $a0, 16360
9000000000223dfc: 44 00 c0 28 ld.d $a0, $tp, 0
9000000000223e00: 84 fc df 02 addi.d $a0, $a0, 2047
9000000000223e04: 84 04 d0 02 addi.d $a0, $a0, 1025
9000000000223e08: 00 c0 57 54 bl 22464 <_restore_fp>
9000000000223e0c: c4 06 40 03 andi $a0, $fp, 1
9000000000223e10: 80 14 00 40 beqz $a0, 20 <init_restore_fp+0xe4>
9000000000223e14: 76 00 c0 28 ld.d $fp, $sp, 0
9000000000223e18: 61 20 c0 28 ld.d $ra, $sp, 8
9000000000223e1c: 63 40 c0 02 addi.d $sp, $sp, 16
9000000000223e20: 20 00 00 4c ret
9000000000223e24: 01 00 2a 00 break 1
```
Note there's only one `csrrd` which means the `BUG_ON` must be checking the *previous* value of `CSR.EUEN`.
Adding the context:
```c
// arch/loongarch/include/asm/loongarch.h
#define read_csr_euen() csr_read32(LOONGARCH_CSR_EUEN)
#define write_csr_euen(val) csr_write32(val, LOONGARCH_CSR_EUEN)
// expanded from __BUILD_CSR_OP(euen)
static inline unsigned long
set_csr_euen(unsigned long set)
{
unsigned long res, new;
res = read_csr_euen();
new = res | set;
write_csr_euen(new);
return res;
}
// arch/loongarch/include/asm/fpu.h
#define enable_fpu() set_csr_euen(CSR_EUEN_FPEN)
static inline int is_fp_enabled(void)
{
return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_FPEN) ?
1 : 0;
}
static inline void __own_fpu(void)
{
enable_fpu();
set_thread_flag(TIF_USEDFPU);
KSTK_EUEN(current) |= CSR_EUEN_FPEN;
}
static inline void init_fpu(void)
{
unsigned int fcsr = current->thread.fpu.fcsr;
__own_fpu();
_init_fpu(fcsr);
set_used_math();
}
```
We can then arrive at the minimal reproducer below.
</blockquote></details>
Minimal reproducer:
```c
void bug(void);
void foo(int flag)
{
if (flag)
(void) __builtin_loongarch_csrwr_w(__builtin_loongarch_csrrd_w(0x2) | 0x1, 0x2);
if (!(__builtin_loongarch_csrrd_w(0x2) & 0x1))
bug();
}
```
Correct codegen:
```
foo: # @foo
# %bb.0: # %entry
beqz $a0, .LBB0_2
# %bb.1: # %if.then
csrrd $a0, 2
ori $a0, $a0, 1
csrwr $a0, 2
.LBB0_2: # %if.end
csrrd $a0, 2
andi $a0, $a0, 1
bnez $a0, .LBB0_4
# %bb.3: # %if.then2
b %plt(bug)
.LBB0_4: # %if.end3
ret
```
Wrong codegen:
```
foo: # @foo
# %bb.0:
csrrd $a1, 2
beqz $a0, .LBB0_2
# %bb.1:
ori $a0, $a1, 1
csrwr $a0, 2
.LBB0_2:
andi $a0, $a1, 1
bnez $a0, .LBB0_4
# %bb.3:
b %plt(bug)
.LBB0_4:
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJysWltzozjT_jXKTVdcQoiDL3KROOP9pna-nak51HtJyaKx9QaDV4g4M7_-LQmwAYOT7Gxqa4eD9Kj76UctqbGoKrUtEO9I8ECCxxtRm12p716woMXNpkx_3q3LukghrbUqtqCKZ6yM2gqjygLKDHbGHCri3xO2Jmy9VWZXbxay3BO2XuWi2D7UKjefVFG_ELbelKW5rY3KK8LWqqpqtBcejQnz3a0s93sszK0X0pgHke8HC_hoQFWQ1gimBLNDOOqy2ILMURT1wdpgeykDDLNUyCziMuBgdsKAxn35jClUKsVbzDKUBvZCP6GuINPlHlbfvhK2_vh59e0raBQpqMJoVVRKVgtCHwm9b__vr1I0whrufyD-apOX8unvujRo712T7zu07JS5HVCWKVpS-hAhbf6T7b0jDISWO8LWeVkW2_b6CXWBOWFro8WhWrTtKyOMkvBcKmulMonGypQak-xAWGwfE7ZsoaOH5gLaP5UBYTFhXl1hmuyF2bnbJWFLODd2Ft3DWunKgFF7hPUXkGVh8MVAXaFeAGHO7gH4aRBrVHaoW2h_ZAOJHgHzCuHCulOLZvzvO1WBEdUT7EQFWan3qPOf1oDUhb9n1FVzOp9VZa1KymOB-uR226c8Fu6lKoQp90oSFnszxvdjCQ8__kg-_9XDT7AQmxzT8wAdxLlrJ4A-0lrUufkJWyxQC_OKcprbJe3-GPNTToH4q7EgrEjvp5sT_x5CHySFLAPKAECkqVqkln9eHQhbnS9uvXAShDsQDxi1OGwJAJVZpLajFgOEeLJ_bPtHIdBRfycCng2NmHZbWgjOO4gYAPL0BCFoC2HmIQJHRRxA4FCYaCA2dQvhtRANFvMmMRwTIoCYAqdAfUtnkaohgLvw2SSAo8ILgcbWCMoBQFZap30qpns6BgQFLm1PbntuCvwFffOjcEod7IG-hPEpcw1xw4YWxwmVwLij5WD04jhgtrnwaTRpXeiIkQHQ0HLjiCm1AhiR2zg4yW3oqGHBiJqj7kNMD96II2j1ORb5aXAzL9HIsUCdRC0Xnf22O2u7_0JdOhuCyVkS8TZAyxhCCb6VqNiXOkk3jU4bPzqUPrKYJCSKW7dmNO-9rvlItnpl1yGai0lmYseMCAFj8DNg4Ukf6dgHG9nQn85bcSOQ0Ma1L5ABBJsVRxwPrYhcCnm_FY4PSsHjECxtJgDY5E1sGIspt9MnOa1s0zNmSX87Fy35ObSNNQB57THVTrp5rS8dEXEIAbU8doMfu8HZMItNjy7byepx8JwNNhZDQVyXpjjl0saKNqMfL3R1xQrB21iEbr4H1opNO0-8JhJTiUzKmbCIuA3LjNbp61lAyHMu5LyX3kcQrVvTeWDTcEMhmE7Uru-Svdu_DW_9SzdAA_Ds-nWQIhdWN2dkz1su-WT_-Oxcyi-z5NA5j8WTUdt0FHE6uYqOUCYZkt2Sw0ar6CTGZKQkb0nmTjzciQf_HpIcvZ9k-fsikv-GiNJORP4VEYWTe8Gr_qVNGuZz6zR9fZ1Om3Waz63TdDZ3pXJA7qwCr5CLzTodTK3T3tvWaeyks-RvXad76WwKMP7t9QDleUZcg7gyI7IuJc-s095oYs6skFm3w51Yp4e7hUlxZPHQivE6_VYrfnu7j7RLMpmE9PLkM56GlEeTKLybzBzSK5Jt_aEsmESJ26VOUgii8baDh822Y3iem4KR58l7LW_OT170urzi8fm8yWbzip07k6Z5fHjEGwXsDUc89OLhKXME8YaDJnqyPe3yyQQzOu1OLk7IHEOMtlszLkGjmWzY7F-8dhl0u6iNRvFk9y9XSgB_lQbB7FAjYVEFZZH_hLJAICF1J0ESUjjulNzBHkVRuRoICWlbgggp7OvKwAZB7lA-qWLbtGD3B43Pqqwrwu7hWeQ1QpnZnqtvXxcffnywfQdFrvs07Xq3BZZ_XMJShczrFAlbi2rff7fYdV39FDNVoCu7JbLSCdZYNOUTu3wk9rnPCIs_ff781x_3X1f_l6y-fU2c4adC1wnlqJXBPsyzyDsk985BuYcruIbYdwxfDqJIMW1qhUny8OPjp0fX5_MXwuJmpOWgPKeK3JpTF66qmkJeFtu2AZq-fYMWUKGZr94Nm2qsrA8FHi-qVBorIP7jFKUXbQs8tm0rINHKWXBqc8GmHW0CRKOpdeFMuix3vVMi2aG-FEdTVTtXFWHEYhe_ZP3lIojDiKjCwLhSd71o2jpHWPy6HIGwEMbGAPFnapMeuFQxR9rQclfxTZK2VPmq2WPKzkGz3JmdE0eWiy1h8feP6-THtw-P6y8_psL757fvf7YexrLWGgvTVIxXVjlDd9_uSq9OPOPJSfA2aJmstFNqa8Et8T80XiysZOzr8-AD8_ucDdxLeia4_hO-W7LG1fK3lXT_gyBFYdNoAUJr9YwgjMuqe1WovchB40GXaS1Rwwbz8thlYX9F2Hr4acE96n166I3z_xdor6Rrx_6m3vaIHxLnGmRlSVjsmHciuYhOU1gfvBzr-zwCJMmmVrlRRXKa_Ik7HSRHwuKZtzp1b-kLa_UG9MXtFJsnM_E-VfzfDMzCBrj3RaDh5-3RXpVaozSubr_F4nrp3nLr3wNhPhBO7V2X74CwYLNZ0NNrFmBh9M-hhxc7s8WnhweasDGM14NR2cJKcQjUKzEPzmfd-6kjYLulbRvOnfA6i_wLUVyKpLMPi_R95s0fnzvzLs7GjV18zJT_LkMtkSNTNqdGwSE3hMVOQMs-GXwYDSxS_2KluZZPms-d_5q-5qn2Jqh-j-TeoKH-SbFnwitSekP0vX8a_XE4XwnkeyJ3k9756dJfihu888I4iiPq0-hmdycxCzGLGWZc8DiKl0vMAhrFKcuiTKbiRt0xynwasshjgcejBY9Dnm5oHEShxHC5JJziXqh8kefP-0Wptzfu-_ld6Ad8eZOLDeaV-6TPmN3puZeEMRI83ug72-d2U28rwmmuKlOdUYwyufstwCebNu_tVi14vPhMXoHQdjmrbsuDUXv1C9ObWud3V34MYEdo_7k96PK_KE3_NwDO7v8FAAD__8TlJik">