[llvm] [AMDGPU] Implement hasBitTest to Optimize Bit Testing Operations (PR #112652)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 16 01:24:13 PST 2024
https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/112652
>From f0880b2ce9f5a3046d81467ffd92c7f8123c69f2 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Oct 2024 11:25:11 +0800
Subject: [PATCH 1/3] [AMDGPU] Implement hasBitTest to Optimize Bit Testing
Operations
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 21 +++++++++++++++++++
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 ++
2 files changed, 23 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3cc4bd92f6471a..5d29c7a030b663 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6035,3 +6035,24 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const {
return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
}
+
+bool AMDGPUTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ if (X->isDivergent() || Y->isDivergent())
+ return false;
+
+ EVT VT = X.getValueType();
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return false;
+
+ auto *ConstantMaskNode = dyn_cast<ConstantSDNode>(Y);
+ if (!ConstantMaskNode)
+ return false;
+
+ APInt MaskValue = ConstantMaskNode->getAPIntValue();
+
+ if (!MaskValue.isPowerOf2())
+ return false;
+
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 33991239a41209..02753646777cd7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -387,6 +387,8 @@ class AMDGPUTargetLowering : public TargetLowering {
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
+
+ bool hasBitTest(SDValue X, SDValue Y) const override;
};
namespace AMDGPUISD {
>From c09c0bb21eea14b5d7ade795731589f1a357d78b Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Fri, 18 Oct 2024 18:55:18 +0800
Subject: [PATCH 2/3] [AMDGPU] Move to SIISelLowering.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 21 ----------------
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 --
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 25 +++++++++++++++++++
llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 +
4 files changed, 26 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5d29c7a030b663..3cc4bd92f6471a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6035,24 +6035,3 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const {
return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
}
-
-bool AMDGPUTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
- if (X->isDivergent() || Y->isDivergent())
- return false;
-
- EVT VT = X.getValueType();
-
- if (VT != MVT::i32 && VT != MVT::i64)
- return false;
-
- auto *ConstantMaskNode = dyn_cast<ConstantSDNode>(Y);
- if (!ConstantMaskNode)
- return false;
-
- APInt MaskValue = ConstantMaskNode->getAPIntValue();
-
- if (!MaskValue.isPowerOf2())
- return false;
-
- return true;
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 02753646777cd7..33991239a41209 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -387,8 +387,6 @@ class AMDGPUTargetLowering : public TargetLowering {
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
-
- bool hasBitTest(SDValue X, SDValue Y) const override;
};
namespace AMDGPUISD {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b186dafb4c0ded..229009fd60cf68 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17024,3 +17024,28 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
AI->eraseFromParent();
return LI;
}
+
+bool SITargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ if (X->isDivergent() || Y->isDivergent())
+ return false;
+
+ EVT VT = X.getValueType();
+
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return false;
+
+ if (VT.isVector()) {
+ EVT ScalarType = VT.getScalarType();
+ if (ScalarType != MVT::i32 && ScalarType != MVT::i64)
+ return false;
+ }
+
+ auto *IsConstOrIsConstSplat = dyn_cast<ConstantSDNode>(Y);
+ if (!dyn_cast<ConstantSDNode>(Y))
+ return false;
+
+ if (!IsConstOrIsConstSplat->getAPIntValue().isPowerOf2())
+ return false;
+
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 32e110fdfa84d4..8d48c0aaeaef7b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -601,6 +601,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
MachineMemOperand::Flags
getTargetMMOFlags(const Instruction &I) const override;
+ bool hasBitTest(SDValue X, SDValue Y) const override;
};
// Returns true if argument is a boolean value which is not serialized into
>From fe675c987e94b63caf9246d22059da30e2cbe60f Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 16 Nov 2024 17:07:50 +0800
Subject: [PATCH 3/3] [AMDGPU] Add lit test for hasBitTest.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +-
llvm/test/CodeGen/AMDGPU/bittest.ll | 166 ++++++++++++++++++++++
2 files changed, 167 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/bittest.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 229009fd60cf68..98a65a4cf61736 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17031,7 +17031,7 @@ bool SITargetLowering::hasBitTest(SDValue X, SDValue Y) const {
EVT VT = X.getValueType();
- if (VT != MVT::i32 && VT != MVT::i64)
+ if (VT != MVT::i32)
return false;
if (VT.isVector()) {
@@ -17040,12 +17040,8 @@ bool SITargetLowering::hasBitTest(SDValue X, SDValue Y) const {
return false;
}
- auto *IsConstOrIsConstSplat = dyn_cast<ConstantSDNode>(Y);
if (!dyn_cast<ConstantSDNode>(Y))
return false;
- if (!IsConstOrIsConstSplat->getAPIntValue().isPowerOf2())
- return false;
-
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/bittest.ll b/llvm/test/CodeGen/AMDGPU/bittest.ll
new file mode 100644
index 00000000000000..f4dd7a77413441
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bittest.ll
@@ -0,0 +1,166 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+
+define i32 @bittest_5_i32(i32 inreg %a) {
+; GCN-LABEL: bittest_5_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_bitcmp0_b32 s0, 5
+; GCN-NEXT: s_cselect_b32 s0, -1, 0
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %shr = lshr i32 %a, 5
+ %not = xor i32 %shr, -1
+ %and = and i32 %not, 1
+ ret i32 %and
+}
+
+define i32 @bittest_7_i32(i32 inreg %a) {
+; GCN-LABEL: bittest_7_i32:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_bitcmp0_b32 s0, 7
+; GCN-NEXT: s_cselect_b32 s0, -1, 0
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %shr = lshr i32 %a, 7
+ %not = xor i32 %shr, -1
+ %and = and i32 %not, 1
+ ret i32 %and
+}
+
+define i32 @bittest_11_i32(i32 inreg %a) {
+; GCN-LABEL: bittest_11_i32:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_bitcmp0_b32 s0, 11
+; GCN-NEXT: s_cselect_b32 s0, -1, 0
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %shr = lshr i32 %a, 11
+ %not = xor i32 %shr, -1
+ %and = and i32 %not, 1
+ ret i32 %and
+}
+
+define i32 @bittest_31_i32(i32 inreg %a) {
+; GCN-LABEL: bittest_31_i32:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_not_b32 s0, s0
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GCN-NEXT: s_lshr_b32 s0, s0, 31
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %shr = lshr i32 %a, 31
+ %not = xor i32 %shr, -1
+ %and = and i32 %not, 1
+ ret i32 %and
+}
+
+define i64 @bittest_7_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_7_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_and_not1_b32 s0, 1, s1
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %shr = lshr i64 %a, 32
+ %not = xor i64 %shr, -1
+ %and = and i64 %not, 1
+ ret i64 %and
+}
+
+define i64 @bittest_10_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_10_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_and_not1_b32 s0, 1, s1
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %shr = lshr i64 %a, 32
+ %not = xor i64 %shr, -1
+ %and = and i64 %not, 1
+ ret i64 %and
+}
+
+define i64 @bittest_11_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_11_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_and_not1_b32 s0, 1, s1
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %shr = lshr i64 %a, 32
+ %not = xor i64 %shr, -1
+ %and = and i64 %not, 1
+ ret i64 %and
+}
+
+define i64 @bittest_31_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_31_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_and_not1_b32 s0, 1, s1
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %shr = lshr i64 %a, 32
+ %not = xor i64 %shr, -1
+ %and = and i64 %not, 1
+ ret i64 %and
+}
+
+define i64 @bittest_32_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_32_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_and_not1_b32 s0, 1, s1
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %shr = lshr i64 %a, 32
+ %not = xor i64 %shr, -1
+ %and = and i64 %not, 1
+ ret i64 %and
+}
+
+define i64 @bittest_63_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_63_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_and_not1_b32 s0, 1, s1
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %shr = lshr i64 %a, 32
+ %not = xor i64 %shr, -1
+ %and = and i64 %not, 1
+ ret i64 %and
+}
+
+; and (srl (not X), C)), 1
+define i32 @bittest_srl_not(i32 inreg %a) {
+; GCN-LABEL: bittest_srl_not:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_bitcmp0_b32 s0, 5
+; GCN-NEXT: s_cselect_b32 s0, -1, 0
+; GCN-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %not = xor i32 %a, -1
+ %shr = lshr i32 %not, 5
+ %and = and i32 %shr, 1
+ ret i32 %and
+}
More information about the llvm-commits
mailing list