[llvm] [AMDGPU] Implement hasBitTest to Optimize Bit Testing Operations (PR #112652)

Harrison Hao via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 16 02:00:38 PST 2024


https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/112652

>From f0880b2ce9f5a3046d81467ffd92c7f8123c69f2 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Oct 2024 11:25:11 +0800
Subject: [PATCH 1/6] [AMDGPU] Implement hasBitTest to Optimize Bit Testing
 Operations

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 21 +++++++++++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  2 ++
 2 files changed, 23 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3cc4bd92f6471a..5d29c7a030b663 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6035,3 +6035,24 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
                                                Register N0, Register N1) const {
   return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
 }
+
+bool AMDGPUTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+  if (X->isDivergent() || Y->isDivergent())
+    return false;
+
+  EVT VT = X.getValueType();
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return false;
+
+  auto *ConstantMaskNode = dyn_cast<ConstantSDNode>(Y);
+  if (!ConstantMaskNode)
+    return false;
+
+  APInt MaskValue = ConstantMaskNode->getAPIntValue();
+
+  if (!MaskValue.isPowerOf2())
+    return false;
+
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 33991239a41209..02753646777cd7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -387,6 +387,8 @@ class AMDGPUTargetLowering : public TargetLowering {
   MVT getFenceOperandTy(const DataLayout &DL) const override {
     return MVT::i32;
   }
+
+  bool hasBitTest(SDValue X, SDValue Y) const override;
 };
 
 namespace AMDGPUISD {

>From c09c0bb21eea14b5d7ade795731589f1a357d78b Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Fri, 18 Oct 2024 18:55:18 +0800
Subject: [PATCH 2/6] [AMDGPU] Move to SIISelLowering.

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 21 ----------------
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  2 --
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 25 +++++++++++++++++++
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  1 +
 4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5d29c7a030b663..3cc4bd92f6471a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6035,24 +6035,3 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
                                                Register N0, Register N1) const {
   return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
 }
-
-bool AMDGPUTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
-  if (X->isDivergent() || Y->isDivergent())
-    return false;
-
-  EVT VT = X.getValueType();
-
-  if (VT != MVT::i32 && VT != MVT::i64)
-    return false;
-
-  auto *ConstantMaskNode = dyn_cast<ConstantSDNode>(Y);
-  if (!ConstantMaskNode)
-    return false;
-
-  APInt MaskValue = ConstantMaskNode->getAPIntValue();
-
-  if (!MaskValue.isPowerOf2())
-    return false;
-
-  return true;
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 02753646777cd7..33991239a41209 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -387,8 +387,6 @@ class AMDGPUTargetLowering : public TargetLowering {
   MVT getFenceOperandTy(const DataLayout &DL) const override {
     return MVT::i32;
   }
-
-  bool hasBitTest(SDValue X, SDValue Y) const override;
 };
 
 namespace AMDGPUISD {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b186dafb4c0ded..229009fd60cf68 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17024,3 +17024,28 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   AI->eraseFromParent();
   return LI;
 }
+
+bool SITargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+  if (X->isDivergent() || Y->isDivergent())
+    return false;
+
+  EVT VT = X.getValueType();
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return false;
+
+  if (VT.isVector()) {
+    EVT ScalarType = VT.getScalarType();
+    if (ScalarType != MVT::i32 && ScalarType != MVT::i64)
+      return false;
+  }
+
+  auto *IsConstOrIsConstSplat = dyn_cast<ConstantSDNode>(Y);
+  if (!dyn_cast<ConstantSDNode>(Y))
+    return false;
+
+  if (!IsConstOrIsConstSplat->getAPIntValue().isPowerOf2())
+    return false;
+
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 32e110fdfa84d4..8d48c0aaeaef7b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -601,6 +601,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
 
   MachineMemOperand::Flags
   getTargetMMOFlags(const Instruction &I) const override;
+  bool hasBitTest(SDValue X, SDValue Y) const override;
 };
 
 // Returns true if argument is a boolean value which is not serialized into

>From fe675c987e94b63caf9246d22059da30e2cbe60f Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 16 Nov 2024 17:07:50 +0800
Subject: [PATCH 3/6] [AMDGPU] Add lit test for hasBitTest.

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |   6 +-
 llvm/test/CodeGen/AMDGPU/bittest.ll       | 166 ++++++++++++++++++++++
 2 files changed, 167 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/bittest.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 229009fd60cf68..98a65a4cf61736 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17031,7 +17031,7 @@ bool SITargetLowering::hasBitTest(SDValue X, SDValue Y) const {
 
   EVT VT = X.getValueType();
 
-  if (VT != MVT::i32 && VT != MVT::i64)
+  if (VT != MVT::i32)
     return false;
 
   if (VT.isVector()) {
@@ -17040,12 +17040,8 @@ bool SITargetLowering::hasBitTest(SDValue X, SDValue Y) const {
       return false;
   }
 
-  auto *IsConstOrIsConstSplat = dyn_cast<ConstantSDNode>(Y);
   if (!dyn_cast<ConstantSDNode>(Y))
     return false;
 
-  if (!IsConstOrIsConstSplat->getAPIntValue().isPowerOf2())
-    return false;
-
   return true;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/bittest.ll b/llvm/test/CodeGen/AMDGPU/bittest.ll
new file mode 100644
index 00000000000000..f4dd7a77413441
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bittest.ll
@@ -0,0 +1,166 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+
+define i32 @bittest_5_i32(i32 inreg %a) {
+; GCN-LABEL: bittest_5_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp0_b32 s0, 5
+; GCN-NEXT:    s_cselect_b32 s0, -1, 0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %shr = lshr i32 %a, 5
+  %not = xor i32 %shr, -1
+  %and = and i32 %not, 1
+  ret i32 %and
+}
+
+define i32 @bittest_7_i32(i32 inreg %a) {
+; GCN-LABEL: bittest_7_i32:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp0_b32 s0, 7
+; GCN-NEXT:    s_cselect_b32 s0, -1, 0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %shr = lshr i32 %a, 7
+  %not = xor i32 %shr, -1
+  %and = and i32 %not, 1
+  ret i32 %and
+}
+
+define i32 @bittest_11_i32(i32 inreg %a) {
+; GCN-LABEL: bittest_11_i32:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp0_b32 s0, 11
+; GCN-NEXT:    s_cselect_b32 s0, -1, 0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %shr = lshr i32 %a, 11
+  %not = xor i32 %shr, -1
+  %and = and i32 %not, 1
+  ret i32 %and
+}
+
+define i32 @bittest_31_i32(i32 inreg %a) {
+; GCN-LABEL: bittest_31_i32:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_not_b32 s0, s0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GCN-NEXT:    s_lshr_b32 s0, s0, 31
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %shr = lshr i32 %a, 31
+  %not = xor i32 %shr, -1
+  %and = and i32 %not, 1
+  ret i32 %and
+}
+
+define i64 @bittest_7_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_7_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_and_not1_b32 s0, 1, s1
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %shr = lshr i64 %a, 32
+  %not = xor i64 %shr, -1
+  %and = and i64 %not, 1
+  ret i64 %and
+}
+
+define i64 @bittest_10_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_10_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_and_not1_b32 s0, 1, s1
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %shr = lshr i64 %a, 32
+  %not = xor i64 %shr, -1
+  %and = and i64 %not, 1
+  ret i64 %and
+}
+
+define i64 @bittest_11_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_11_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_and_not1_b32 s0, 1, s1
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %shr = lshr i64 %a, 32
+  %not = xor i64 %shr, -1
+  %and = and i64 %not, 1
+  ret i64 %and
+}
+
+define i64 @bittest_31_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_31_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_and_not1_b32 s0, 1, s1
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %shr = lshr i64 %a, 32
+  %not = xor i64 %shr, -1
+  %and = and i64 %not, 1
+  ret i64 %and
+}
+
+define i64 @bittest_32_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_32_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_and_not1_b32 s0, 1, s1
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %shr = lshr i64 %a, 32
+  %not = xor i64 %shr, -1
+  %and = and i64 %not, 1
+  ret i64 %and
+}
+
+define i64 @bittest_63_i64(i64 inreg %a) {
+; GCN-LABEL: bittest_63_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_and_not1_b32 s0, 1, s1
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %shr = lshr i64 %a, 32
+  %not = xor i64 %shr, -1
+  %and = and i64 %not, 1
+  ret i64 %and
+}
+
+; and (srl (not X), C)), 1
+define i32 @bittest_srl_not(i32 inreg %a) {
+; GCN-LABEL: bittest_srl_not:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_bitcmp0_b32 s0, 5
+; GCN-NEXT:    s_cselect_b32 s0, -1, 0
+; GCN-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+    %not = xor i32 %a, -1
+    %shr = lshr i32 %not, 5
+    %and = and i32 %shr, 1
+    ret i32 %and
+}

>From 3df7755e17874a11a27433bfd020e0a1a3329f60 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 16 Nov 2024 17:38:55 +0800
Subject: [PATCH 4/6] [AMDGPU] Update hasBitTest.

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 98a65a4cf61736..f0660ea6790e9c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17029,18 +17029,12 @@ bool SITargetLowering::hasBitTest(SDValue X, SDValue Y) const {
   if (X->isDivergent() || Y->isDivergent())
     return false;
 
-  EVT VT = X.getValueType();
-
-  if (VT != MVT::i32)
+  EVT ScalarType = X.getValueType().getScalarType();
+  
+  if (ScalarType != MVT::i32)
     return false;
 
-  if (VT.isVector()) {
-    EVT ScalarType = VT.getScalarType();
-    if (ScalarType != MVT::i32 && ScalarType != MVT::i64)
-      return false;
-  }
-
-  if (!dyn_cast<ConstantSDNode>(Y))
+  if (!isConstOrConstSplat(Y))
     return false;
 
   return true;

>From 9ac8aa5bb5ec0fee84d1807cd01d4ea8ef1612e6 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 16 Nov 2024 17:48:03 +0800
Subject: [PATCH 5/6] [AMDGPU] Update lit test comment.

---
 llvm/test/CodeGen/AMDGPU/bittest.ll | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/bittest.ll b/llvm/test/CodeGen/AMDGPU/bittest.ll
index f4dd7a77413441..456ea7607fe3b1 100644
--- a/llvm/test/CodeGen/AMDGPU/bittest.ll
+++ b/llvm/test/CodeGen/AMDGPU/bittest.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
 
+; Test pattern: (and (not (srl X, C)), 1)
 define i32 @bittest_5_i32(i32 inreg %a) {
 ; GCN-LABEL: bittest_5_i32:
 ; GCN:       ; %bb.0:
@@ -148,7 +149,7 @@ define i64 @bittest_63_i64(i64 inreg %a) {
   ret i64 %and
 }
 
-; and (srl (not X), C)), 1
+; Test pattern: (and (lshr (not X), C), 1)
 define i32 @bittest_srl_not(i32 inreg %a) {
 ; GCN-LABEL: bittest_srl_not:
 ; GCN:       ; %bb.0: ; %entry

>From b848149fa4dcd19ad413414632ef0c1856e0814f Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 16 Nov 2024 18:00:15 +0800
Subject: [PATCH 6/6] [AMDGPU] Fix clang format issue.

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f0660ea6790e9c..fd34b1e473cc6f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17030,7 +17030,7 @@ bool SITargetLowering::hasBitTest(SDValue X, SDValue Y) const {
     return false;
 
   EVT ScalarType = X.getValueType().getScalarType();
-  
+
   if (ScalarType != MVT::i32)
     return false;
 



More information about the llvm-commits mailing list