[llvm] 6e3827a - [AMDGPU] Create matchPERM helper from performOrCombine PERM matching code.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 22 08:21:58 PDT 2023
Author: Simon Pilgrim
Date: 2023-09-22T16:21:28+01:00
New Revision: 6e3827af98fa59d5147598972625a5317936c31f
URL: https://github.com/llvm/llvm-project/commit/6e3827af98fa59d5147598972625a5317936c31f
DIFF: https://github.com/llvm/llvm-project/commit/6e3827af98fa59d5147598972625a5317936c31f.diff
LOG: [AMDGPU] Create matchPERM helper from performOrCombine PERM matching code.
Pulled out as NFC(ish) pre-commit from D159533
Added:
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b88e062dc16f787..591775dbf45e396 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11003,6 +11003,89 @@ static bool hasNon16BitAccesses(uint64_t PermMask, SDValue &Op,
return !addresses16Bits(Low16) || !addresses16Bits(Hi16);
}
+static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ // VT is known to be MVT::i32, so we need to provide 4 bytes.
+ SmallVector<ByteProvider<SDValue>, 8> PermNodes;
+ for (int i = 0; i < 4; i++) {
+ // Find the ByteProvider that provides the ith byte of the result of OR
+ std::optional<ByteProvider<SDValue>> P =
+ calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
+ // TODO support constantZero
+ if (!P || P->isConstantZero())
+ return SDValue();
+
+ PermNodes.push_back(*P);
+ }
+ if (PermNodes.size() != 4)
+ return SDValue();
+
+ int FirstSrc = 0;
+ std::optional<int> SecondSrc;
+ uint64_t PermMask = 0x00000000;
+ for (size_t i = 0; i < PermNodes.size(); i++) {
+ auto PermOp = PermNodes[i];
+ // Since the mask is applied to Src1:Src2, Src1 bytes must be offset
+ // by sizeof(Src2) = 4
+ int SrcByteAdjust = 4;
+
+ if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
+ if (SecondSrc.has_value())
+ if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
+ return SDValue();
+
+ // Set the index of the second distinct Src node
+ SecondSrc = i;
+ assert(!(PermNodes[*SecondSrc].Src->getValueSizeInBits() % 8));
+ SrcByteAdjust = 0;
+ }
+ assert(PermOp.SrcOffset + SrcByteAdjust < 8);
+ assert(!DAG.getDataLayout().isBigEndian());
+ PermMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
+ }
+
+ SDValue Op = *PermNodes[FirstSrc].Src;
+ SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
+ : *PermNodes[FirstSrc].Src;
+
+ // Check that we are not just extracting the bytes in order from an op
+ if (Op == OtherOp && Op.getValueSizeInBits() == 32) {
+ int Low16 = PermMask & 0xffff;
+ int Hi16 = (PermMask & 0xffff0000) >> 16;
+
+ bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
+ bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
+
+ // The perm op would really just produce Op. So combine into Op
+ if (WellFormedLow && WellFormedHi)
+ return DAG.getBitcast(MVT::getIntegerVT(32), Op);
+ }
+
+ if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
+ SDLoc DL(N);
+ assert(Op.getValueType().isByteSized() &&
+ OtherOp.getValueType().isByteSized());
+
+ // If the ultimate src is less than 32 bits, then we will only be
+ // using bytes 0: Op.getValueSizeInBytes() - 1 in the or.
+ // CalculateByteProvider would not have returned Op as source if we
+ // used a byte that is outside its ValueType. Thus, we are free to
+ // ANY_EXTEND as the extended bits are dont-cares.
+ Op = DAG.getBitcastedAnyExtOrTrunc(Op, DL, MVT::i32);
+ OtherOp = DAG.getBitcastedAnyExtOrTrunc(OtherOp, DL, MVT::i32);
+
+ return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
+ DAG.getConstant(PermMask, DL, MVT::i32));
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::performOrCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -11116,80 +11199,8 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
}
}
if (LHSMask == ~0u || RHSMask == ~0u) {
- SmallVector<ByteProvider<SDValue>, 8> PermNodes;
-
- // VT is known to be MVT::i32, so we need to provide 4 bytes.
- assert(VT == MVT::i32);
- for (int i = 0; i < 4; i++) {
- // Find the ByteProvider that provides the ith byte of the result of OR
- std::optional<ByteProvider<SDValue>> P =
- calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
- // TODO support constantZero
- if (!P || P->isConstantZero())
- return SDValue();
-
- PermNodes.push_back(*P);
- }
- if (PermNodes.size() != 4)
- return SDValue();
-
- int FirstSrc = 0;
- std::optional<int> SecondSrc;
- uint64_t PermMask = 0x00000000;
- for (size_t i = 0; i < PermNodes.size(); i++) {
- auto PermOp = PermNodes[i];
- // Since the mask is applied to Src1:Src2, Src1 bytes must be offset
- // by sizeof(Src2) = 4
- int SrcByteAdjust = 4;
-
- if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
- if (SecondSrc.has_value())
- if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
- return SDValue();
-
- // Set the index of the second distinct Src node
- SecondSrc = i;
- assert(!(PermNodes[*SecondSrc].Src->getValueSizeInBits() % 8));
- SrcByteAdjust = 0;
- }
- assert(PermOp.SrcOffset + SrcByteAdjust < 8);
- assert(!DAG.getDataLayout().isBigEndian());
- PermMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
- }
-
- SDValue Op = *PermNodes[FirstSrc].Src;
- SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
- : *PermNodes[FirstSrc].Src;
-
- // Check that we are not just extracting the bytes in order from an op
- if (Op == OtherOp && Op.getValueSizeInBits() == 32) {
- int Low16 = PermMask & 0xffff;
- int Hi16 = (PermMask & 0xffff0000) >> 16;
-
- bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
- bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
-
- // The perm op would really just produce Op. So combine into Op
- if (WellFormedLow && WellFormedHi)
- return DAG.getBitcast(MVT::getIntegerVT(32), Op);
- }
-
- if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
- SDLoc DL(N);
- assert(Op.getValueType().isByteSized() &&
- OtherOp.getValueType().isByteSized());
-
- // If the ultimate src is less than 32 bits, then we will only be
- // using bytes 0: Op.getValueSizeInBytes() - 1 in the or.
- // CalculateByteProvider would not have returned Op as source if we
- // used a byte that is outside its ValueType. Thus, we are free to
- // ANY_EXTEND as the extended bits are dont-cares.
- Op = DAG.getBitcastedAnyExtOrTrunc(Op, DL, MVT::i32);
- OtherOp = DAG.getBitcastedAnyExtOrTrunc(OtherOp, DL, MVT::i32);
-
- return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
- DAG.getConstant(PermMask, DL, MVT::i32));
- }
+ if (SDValue Perm = matchPERM(N, DCI))
+ return Perm;
}
}
More information about the llvm-commits
mailing list