[llvm] AMDGPU: Use "countMaxActiveBits() <= 5" to define uint5Bits (PR #115543)
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 13:00:54 PST 2024
https://github.com/changpeng created https://github.com/llvm/llvm-project/pull/115543
countMaxTrailingOnes() is not correct. This patch follows the suggestion from https://github.com/llvm/llvm-project/pull/115372.
>From a2bacf8ab58af4c1a0247026ea131443d6066602 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <changpeng.fang at amd.com>
Date: Fri, 8 Nov 2024 12:55:28 -0800
Subject: [PATCH] AMDGPU: Use "countMaxActiveBits() <= 5" to define uint5Bits
countMaxTrailingOnes() is not correct. This patch follows the suggestion
from https://github.com/llvm/llvm-project/pull/115372.
---
llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +-
llvm/test/CodeGen/AMDGPU/extract-lowbits.ll | 25 +++++++++++++++++++++
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0658e030ffa5d6..755cbb7fb65492 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3554,7 +3554,7 @@ def : AMDGPUPat <
>;
def uint5Bits : PatLeaf<(i32 VGPR_32:$width), [{
- return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxTrailingOnes() <= 5;
+ return CurDAG->computeKnownBits(SDValue(N, 0)).countMaxActiveBits() <= 5;
}]>;
// x << (bitwidth - y) >> (bitwidth - y)
diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
index 3de8db2c6a448e..0e5a68773a6ba8 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll
@@ -163,6 +163,31 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
ret i32 %masked
}
+define i32 @bzhi32_d0_even(i32 %val, i32 %numlowbits) nounwind {
+; SI-LABEL: bzhi32_d0_even:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
+; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
+; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
+; SI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: bzhi32_d0_even:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
+; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
+; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+ %times2 = shl i32 %numlowbits, 1
+ %numhighbits = sub i32 32, %times2
+ %highbitscleared = shl i32 %val, %numhighbits
+ %masked = lshr i32 %highbitscleared, %numhighbits
+ ret i32 %masked
+}
+
define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
; SI-LABEL: bzhi32_d1_indexzext:
; SI: ; %bb.0:
More information about the llvm-commits
mailing list