[llvm] AMDGPU: Don't avoid clamp of bit shift in BFE pattern (PR #115372)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 13:07:10 PST 2024
================
@@ -150,22 +150,14 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
; ---------------------------------------------------------------------------- ;
define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
-; SI-LABEL: bzhi32_d0:
-; SI: ; %bb.0:
-; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
-; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
-; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
-; SI-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-LABEL: bzhi32_d0:
-; VI: ; %bb.0:
-; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
-; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
-; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
-; VI-NEXT: s_setpc_b64 s[30:31]
- %numhighbits = sub i32 32, %numlowbits
+; GCN-LABEL: bzhi32_d0:
----------------
arsenm wrote:
Instcombine does change this:
```
define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) #0 {
%numlow5bits = and i32 %numlowbits, 31
%numhighbits = sub nuw nsw i32 32, %numlow5bits
%1 = lshr i32 -1, %numhighbits
%masked = and i32 %1, %val
ret i32 %masked
}
```
Codegenning this does produce BFE already (except we fail to optimize out the clamp, which is a separate issue)
https://github.com/llvm/llvm-project/pull/115372
More information about the llvm-commits
mailing list