[llvm] AMDGPU: Disable pattern matching from x&(-1>>(32-y) to "bfe x, 0, y" (PR #116115)

Wed Nov 13 14:39:19 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Changpeng Fang (changpeng)

<details>
<summary>Changes</summary>

  It is not correct to lower x&(-1>>(32-y) to "bfe x, 0, y". When y equals 32, "-1" is not shifted, so x&(-1>>(32-32) is still x, but "bfe x, 0, 32" is 0.

---
Full diff: https://github.com/llvm/llvm-project/pull/116115.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (-6) 
- (modified) llvm/test/CodeGen/AMDGPU/extract-lowbits.ll (+30-10) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 5f4cca0645b0ef..2831d0339df6b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3550,12 +3550,6 @@ def : AMDGPUPat <
   (V_BFE_U32_e64 $src, (i32 0), $width)
 >;
 
-// x & (-1 >> (bitwidth - y))
-def : AMDGPUPat <
-  (DivergentBinFrag<and> i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
-  (V_BFE_U32_e64 $src, (i32 0), $width)
->;
-
 // SHA-256 Ma patterns
 
 // ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y
diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
index 7f1f7133d69919..cc2ef0c48f1152 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
@@ -99,11 +99,21 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; ---------------------------------------------------------------------------- ;
 
 define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
-; GCN-LABEL: bzhi32_c0:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; SI-LABEL: bzhi32_c0:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
+; SI-NEXT:    v_lshr_b32_e32 v1, -1, v1
+; SI-NEXT:    v_and_b32_e32 v0, v1, v0
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: bzhi32_c0:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u32_e32 v1, vcc, 32, v1
+; VI-NEXT:    v_lshrrev_b32_e64 v1, v1, -1
+; VI-NEXT:    v_and_b32_e32 v0, v1, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
   %masked = and i32 %mask, %val
@@ -134,11 +144,21 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 }
 
 define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
-; GCN-LABEL: bzhi32_c4_commutative:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_bfe_u32 v0, v0, 0, v1
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; SI-LABEL: bzhi32_c4_commutative:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_sub_i32_e32 v1, vcc, 32, v1
+; SI-NEXT:    v_lshr_b32_e32 v1, -1, v1
+; SI-NEXT:    v_and_b32_e32 v0, v0, v1
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: bzhi32_c4_commutative:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_sub_u32_e32 v1, vcc, 32, v1
+; VI-NEXT:    v_lshrrev_b32_e64 v1, v1, -1
+; VI-NEXT:    v_and_b32_e32 v0, v0, v1
+; VI-NEXT:    s_setpc_b64 s[30:31]
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
   %masked = and i32 %val, %mask ; swapped order

``````````

</details>


https://github.com/llvm/llvm-project/pull/116115