[llvm] AMDGPU: Use "countMaxActiveBits() <= 5" to define uint5Bits (PR #115543)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 13:01:30 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Changpeng Fang (changpeng)
<details>
<summary>Changes</summary>
countMaxTrailingOnes() is not correct. This patch follows the suggestion from https://github.com/llvm/llvm-project/pull/115372.
---
Full diff: https://github.com/llvm/llvm-project/pull/115543.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/extract-lowbits.ll (+25)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0658e030ffa5d6..755cbb7fb65492 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3554,7 +3554,7 @@ def : AMDGPUPat <
>;
def uint5Bits : PatLeaf<(i32 VGPR_32:$width), [{
- return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxTrailingOnes() <= 5;
+ return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxActiveBits() <= 5;
}]>;
// x << (bitwidth - y) >> (bitwidth - y)
diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
index 3de8db2c6a448e..0e5a68773a6ba8 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
@@ -163,6 +163,31 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
ret i32 %masked
}
+define i32 @bzhi32_d0_even(i32 %val, i32 %numlowbits) nounwind {
+; SI-LABEL: bzhi32_d0_even:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
+; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
+; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
+; SI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: bzhi32_d0_even:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
+; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
+; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+ %times2 = shl i32 %numlowbits, 1
+ %numhighbits = sub i32 32, %times2
+ %highbitscleared = shl i32 %val, %numhighbits
+ %masked = lshr i32 %highbitscleared, %numhighbits
+ ret i32 %masked
+}
+
define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; SI-LABEL: bzhi32_d1_indexzext:
; SI: ; %bb.0:
``````````
</details>
https://github.com/llvm/llvm-project/pull/115543
More information about the llvm-commits
mailing list