[llvm] [AMDGPU] Implement hasAndNot for scalar bitwise AND-NOT operations. (PR #112647)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 17 22:28:12 PDT 2024
https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/112647
>From 1822dab3f3a4b3634bece0edd095c14b8f502a56 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Oct 2024 09:59:03 +0800
Subject: [PATCH 1/6] [AMDGPU] Implement hasAndNot for scalar bitwise AND-NOT
operations.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 8 ++++++++
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 ++
2 files changed, 10 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 0f65df0763cc83..b746b94a60be21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3721,6 +3721,14 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
return DAG.getBuildVector(VT, DL, Args);
}
+bool AMDGPUTargetLowering::hasAndNot(SDValue Op) const {
+ if (Op->isDivergent())
+ return false;
+
+ EVT VT = Op.getValueType();
+ return VT == MVT::i32 || VT == MVT::i64;
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index b2fd31cb2346eb..1289458570358b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -99,6 +99,8 @@ class AMDGPUTargetLowering : public TargetLowering {
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ bool hasAndNot(SDValue Y) const override;
+
protected:
bool shouldCombineMemoryType(EVT VT) const;
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
>From 43a26e7424299296ec1aaa1067d99a3185c4d294 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Oct 2024 10:31:26 +0800
Subject: [PATCH 2/6] [AMDGPU] Update value name.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 1289458570358b..d05b8901c0cb6e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -99,7 +99,7 @@ class AMDGPUTargetLowering : public TargetLowering {
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- bool hasAndNot(SDValue Y) const override;
+ bool hasAndNot(SDValue Op) const override;
protected:
bool shouldCombineMemoryType(EVT VT) const;
>From b873ad212233965cf7054fe13ca43696957cd2cb Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Oct 2024 12:07:09 +0800
Subject: [PATCH 3/6] [AMDGPU] Update patch.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 16 ++++++++--------
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 9 +++++++--
2 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b746b94a60be21..09860cfff96fc2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3721,14 +3721,6 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
return DAG.getBuildVector(VT, DL, Args);
}
-bool AMDGPUTargetLowering::hasAndNot(SDValue Op) const {
- if (Op->isDivergent())
- return false;
-
- EVT VT = Op.getValueType();
- return VT == MVT::i32 || VT == MVT::i64;
-}
-
//===----------------------------------------------------------------------===//
// Custom DAG optimizations
//===----------------------------------------------------------------------===//
@@ -6051,3 +6043,11 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const {
return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
}
+
+bool AMDGPUTargetLowering::hasAndNot(SDValue Op) const {
+ if (Op->isDivergent())
+ return false;
+
+ EVT VT = Op.getValueType();
+ return VT == MVT::i32 || VT == MVT::i64;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index d05b8901c0cb6e..952c95eba63760 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -99,8 +99,6 @@ class AMDGPUTargetLowering : public TargetLowering {
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- bool hasAndNot(SDValue Op) const override;
-
protected:
bool shouldCombineMemoryType(EVT VT) const;
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -389,6 +387,13 @@ class AMDGPUTargetLowering : public TargetLowering {
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
+
+ /// Return true if the target supports a bitwise and-not operation:
+ /// X = ~A & B
+ /// This function checks if the operation can be directly mapped to the
+ /// target's native instructions, potentially simplifying select or other
+ /// related instructions by using more efficient hardware-specific operations.
+ bool hasAndNot(SDValue Op) const override;
};
namespace AMDGPUISD {
>From e75dde769d705388720e6d967d51ed319d87cc13 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Oct 2024 12:54:13 +0800
Subject: [PATCH 4/6] [AMDGPU] Move to SIISelLowering.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 8 --------
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 7 -------
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 ++++++++
llvm/lib/Target/AMDGPU/SIISelLowering.h | 7 +++++++
4 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 09860cfff96fc2..0f65df0763cc83 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6043,11 +6043,3 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const {
return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
}
-
-bool AMDGPUTargetLowering::hasAndNot(SDValue Op) const {
- if (Op->isDivergent())
- return false;
-
- EVT VT = Op.getValueType();
- return VT == MVT::i32 || VT == MVT::i64;
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 952c95eba63760..b2fd31cb2346eb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -387,13 +387,6 @@ class AMDGPUTargetLowering : public TargetLowering {
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
-
- /// Return true if the target supports a bitwise and-not operation:
- /// X = ~A & B
- /// This function checks if the operation can be directly mapped to the
- /// target's native instructions, potentially simplifying select or other
- /// related instructions by using more efficient hardware-specific operations.
- bool hasAndNot(SDValue Op) const override;
};
namespace AMDGPUISD {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index de9173e923ab5c..f59f25c7117491 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16890,3 +16890,11 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
AI->eraseFromParent();
return LI;
}
+
+bool SITargetLowering::hasAndNot(SDValue Op) const {
+ if (Op->isDivergent())
+ return false;
+
+ EVT VT = Op.getValueType();
+ return VT == MVT::i32 || VT == MVT::i64;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 6c3edf37945e24..b2094ad7993316 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -598,6 +598,13 @@ class SITargetLowering final : public AMDGPUTargetLowering {
MachineMemOperand::Flags
getTargetMMOFlags(const Instruction &I) const override;
+
+ /// Return true if the target supports a bitwise and-not operation:
+ /// X = ~A & B
+ /// This function checks if the operation can be directly mapped to the
+ /// target's native instructions, potentially simplifying select or other
+ /// related instructions by using more efficient hardware-specific operations.
+ bool hasAndNot(SDValue Op) const override;
};
// Returns true if argument is a boolean value which is not serialized into
>From 1417d7158afe057e2f1e1a00d2ff2ca2302a90f6 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 17 Oct 2024 14:22:34 +0800
Subject: [PATCH 5/6] [AMDGPU] Update comments.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 ++
llvm/lib/Target/AMDGPU/SIISelLowering.h | 6 ------
2 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f59f25c7117491..660731e9ce446d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16892,6 +16892,8 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
}
bool SITargetLowering::hasAndNot(SDValue Op) const {
+ // Return false if the operation is divergent, as AND-NOT optimization
+ // requires uniform behavior across threads.
if (Op->isDivergent())
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index b2094ad7993316..a07ee1641d9ae2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -598,12 +598,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
MachineMemOperand::Flags
getTargetMMOFlags(const Instruction &I) const override;
-
- /// Return true if the target supports a bitwise and-not operation:
- /// X = ~A & B
- /// This function checks if the operation can be directly mapped to the
- /// target's native instructions, potentially simplifying select or other
- /// related instructions by using more efficient hardware-specific operations.
bool hasAndNot(SDValue Op) const override;
};
>From 2c9aa28f7548634817ba18871bbcf42d6885c4b3 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Fri, 18 Oct 2024 13:21:18 +0800
Subject: [PATCH 6/6] [AMDGPU] Add a lit test for hasAndNot.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 87 +++++++++++++++++++++--
llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 +
llvm/test/CodeGen/AMDGPU/andornot.ll | 39 ++++++++++
3 files changed, 122 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/andornot.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 660731e9ce446d..b78168131edc13 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6822,6 +6822,81 @@ static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
}
}
+SDValue SITargetLowering::combineAnd(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ const unsigned Opc = Op.getOpcode();
+ assert(Opc == ISD::AND);
+
+ auto &DAG = DCI.DAG;
+ SDLoc DL(Op);
+
+ if(hasAndNot(Op)) {
+ SDValue LHS = Op->getOperand(0);
+ SDValue RHS = Op->getOperand(1);
+
+ // (and LHS, (or Y, ~Z))
+ if (RHS.getOpcode() == ISD::OR && RHS.hasOneUse()) {
+ SDValue Y = RHS->getOperand(0);
+ SDValue NotZ = RHS->getOperand(1);
+
+ if (NotZ.getOpcode() == ISD::XOR && isAllOnesConstant(NotZ->getOperand(1))) {
+ SDValue Z = NotZ->getOperand(0);
+
+ if (!isa<ConstantSDNode>(Y)) {
+ SDValue NotY = DAG.getNOT(DL, Y, Y.getValueType());
+ SDValue AndNotYZ = DAG.getNode(ISD::AND, DL, Y.getValueType(), NotY, Z);
+ SDValue NotAndNotYZ = DAG.getNOT(DL, AndNotYZ, AndNotYZ.getValueType());
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, Op.getValueType(), LHS, NotAndNotYZ);
+ return NewAnd;
+ }
+ }
+ }
+ }
+
+ EVT OpTy = (Opc != ISD::SETCC) ? Op.getValueType()
+ : Op->getOperand(0).getValueType();
+ auto ExtTy = OpTy.changeElementType(MVT::i32);
+
+ if (DCI.isBeforeLegalizeOps() ||
+ isNarrowingProfitable(Op.getNode(), ExtTy, OpTy))
+ return SDValue();
+
+ SDValue LHS;
+ SDValue RHS;
+ if (Opc == ISD::SELECT) {
+ LHS = Op->getOperand(1);
+ RHS = Op->getOperand(2);
+ } else {
+ LHS = Op->getOperand(0);
+ RHS = Op->getOperand(1);
+ }
+
+ const unsigned ExtOp = getExtOpcodeForPromotedOp(Op);
+ LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS});
+
+ // Special case: for shifts, the RHS always needs a zext.
+ if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
+ RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS});
+ else
+ RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});
+
+ // setcc always return i1/i1 vec so no need to truncate after.
+ if (Opc == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ return DAG.getSetCC(DL, Op.getValueType(), LHS, RHS, CC);
+ }
+
+ // For other ops, we extend the operation's return type as well so we need to
+ // truncate back to the original type.
+ SDValue NewVal;
+ if (Opc == ISD::SELECT)
+ NewVal = DAG.getNode(ISD::SELECT, DL, ExtTy, {Op->getOperand(0), LHS, RHS});
+ else
+ NewVal = DAG.getNode(Opc, DL, ExtTy, {LHS, RHS});
+
+ return DAG.getZExtOrTrunc(NewVal, DL, OpTy);
+}
+
SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
DAGCombinerInfo &DCI) const {
const unsigned Opc = Op.getOpcode();
@@ -14797,16 +14872,19 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
return SDValue(CSrc, 0);
}
-
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
+ case ISD::AND:
+ if (auto Res = combineAnd(SDValue(N, 0), DCI))
+ return Res;
+ break;
case ISD::ADD:
case ISD::SUB:
case ISD::SHL:
case ISD::SRL:
case ISD::SRA:
- case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::MUL:
@@ -14910,7 +14988,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::CLAMP:
return performClampCombine(N, DCI);
case ISD::SCALAR_TO_VECTOR: {
- SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
// v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
@@ -16892,8 +16969,8 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
}
bool SITargetLowering::hasAndNot(SDValue Op) const {
- // Return false if the operation is divergent, as AND-NOT optimization
- // requires uniform behavior across threads.
+ // Return false if the operation is divergent, as AND-NOT is a scalar-only
+ // instruction.
if (Op->isDivergent())
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a07ee1641d9ae2..4c90f4da42006b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -147,6 +147,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue combineAnd(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AMDGPU/andornot.ll b/llvm/test/CodeGen/AMDGPU/andornot.ll
new file mode 100644
index 00000000000000..821709847ab8d5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/andornot.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}scalar_and_or_not_i16
+; GCN: s_not_b32
+; GCN-NEXT: s_lshr_b32
+; GCN-NEXT: s_and_b32
+; GCN-NEXT: s_andn2_b32
+define amdgpu_kernel void @scalar_and_or_not_i16(ptr addrspace(1) %out, i16 %x, i16 %y, i16 %z) {
+entry:
+ %not_z = xor i16 %z, -1
+ %or_y_not_z = or i16 %y, %not_z
+ %and_result = and i16 %x, %or_y_not_z
+ store i16 %and_result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}scalar_and_or_not_i32
+; GCN: s_andn2_b32
+; GCN-NEXT: s_andn2_b32
+define amdgpu_kernel void @scalar_and_or_not_i32(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
+entry:
+ %not_z = xor i32 %z, -1
+ %or_y_not_z = or i32 %y, %not_z
+ %and_result = and i32 %x, %or_y_not_z
+ store i32 %and_result, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}scalar_and_or_not_i64
+; GCN: s_andn2_b64
+; GCN-NEXT: s_andn2_b64
+define amdgpu_kernel void @scalar_and_or_not_i64(ptr addrspace(1) %out, i64 %x, i64 %y, i64 %z) {
+entry:
+ %not_z = xor i64 %z, -1
+ %or_y_not_z = or i64 %y, %not_z
+ %and_result = and i64 %x, %or_y_not_z
+ store i64 %and_result, ptr addrspace(1) %out, align 4
+ ret void
+}
More information about the llvm-commits
mailing list