[llvm] 6e3827a - [AMDGPU] Create matchPERM helper from performOrCombine PERM matching code.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 22 08:21:58 PDT 2023


Author: Simon Pilgrim
Date: 2023-09-22T16:21:28+01:00
New Revision: 6e3827af98fa59d5147598972625a5317936c31f

URL: https://github.com/llvm/llvm-project/commit/6e3827af98fa59d5147598972625a5317936c31f
DIFF: https://github.com/llvm/llvm-project/commit/6e3827af98fa59d5147598972625a5317936c31f.diff

LOG: [AMDGPU] Create matchPERM helper from performOrCombine PERM matching code.

Pulled out as NFC(ish) pre-commit from D159533

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b88e062dc16f787..591775dbf45e396 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11003,6 +11003,89 @@ static bool hasNon16BitAccesses(uint64_t PermMask, SDValue &Op,
   return !addresses16Bits(Low16) || !addresses16Bits(Hi16);
 }
 
+static SDValue matchPERM(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+
+  if (VT != MVT::i32)
+    return SDValue();
+
+  // VT is known to be MVT::i32, so we need to provide 4 bytes.
+  SmallVector<ByteProvider<SDValue>, 8> PermNodes;
+  for (int i = 0; i < 4; i++) {
+    // Find the ByteProvider that provides the ith byte of the result of OR
+    std::optional<ByteProvider<SDValue>> P =
+        calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
+    // TODO support constantZero
+    if (!P || P->isConstantZero())
+      return SDValue();
+
+    PermNodes.push_back(*P);
+  }
+  if (PermNodes.size() != 4)
+    return SDValue();
+
+  int FirstSrc = 0;
+  std::optional<int> SecondSrc;
+  uint64_t PermMask = 0x00000000;
+  for (size_t i = 0; i < PermNodes.size(); i++) {
+    auto PermOp = PermNodes[i];
+    // Since the mask is applied to Src1:Src2, Src1 bytes must be offset
+    // by sizeof(Src2) = 4
+    int SrcByteAdjust = 4;
+
+    if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
+      if (SecondSrc.has_value())
+        if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
+          return SDValue();
+
+      // Set the index of the second distinct Src node
+      SecondSrc = i;
+      assert(!(PermNodes[*SecondSrc].Src->getValueSizeInBits() % 8));
+      SrcByteAdjust = 0;
+    }
+    assert(PermOp.SrcOffset + SrcByteAdjust < 8);
+    assert(!DAG.getDataLayout().isBigEndian());
+    PermMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
+  }
+
+  SDValue Op = *PermNodes[FirstSrc].Src;
+  SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
+                                          : *PermNodes[FirstSrc].Src;
+
+  // Check that we are not just extracting the bytes in order from an op
+  if (Op == OtherOp && Op.getValueSizeInBits() == 32) {
+    int Low16 = PermMask & 0xffff;
+    int Hi16 = (PermMask & 0xffff0000) >> 16;
+
+    bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
+    bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
+
+    // The perm op would really just produce Op. So combine into Op
+    if (WellFormedLow && WellFormedHi)
+      return DAG.getBitcast(MVT::getIntegerVT(32), Op);
+  }
+
+  if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
+    SDLoc DL(N);
+    assert(Op.getValueType().isByteSized() &&
+           OtherOp.getValueType().isByteSized());
+
+    // If the ultimate src is less than 32 bits, then we will only be
+    // using bytes 0: Op.getValueSizeInBytes() - 1 in the or.
+    // CalculateByteProvider would not have returned Op as source if we
+    // used a byte that is outside its ValueType. Thus, we are free to
+    // ANY_EXTEND as the extended bits are dont-cares.
+    Op = DAG.getBitcastedAnyExtOrTrunc(Op, DL, MVT::i32);
+    OtherOp = DAG.getBitcastedAnyExtOrTrunc(OtherOp, DL, MVT::i32);
+
+    return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
+                       DAG.getConstant(PermMask, DL, MVT::i32));
+  }
+
+  return SDValue();
+}
+
 SDValue SITargetLowering::performOrCombine(SDNode *N,
                                            DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -11116,80 +11199,8 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
       }
     }
     if (LHSMask == ~0u || RHSMask == ~0u) {
-      SmallVector<ByteProvider<SDValue>, 8> PermNodes;
-
-      // VT is known to be MVT::i32, so we need to provide 4 bytes.
-      assert(VT == MVT::i32);
-      for (int i = 0; i < 4; i++) {
-        // Find the ByteProvider that provides the ith byte of the result of OR
-        std::optional<ByteProvider<SDValue>> P =
-            calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
-        // TODO support constantZero
-        if (!P || P->isConstantZero())
-          return SDValue();
-
-        PermNodes.push_back(*P);
-      }
-      if (PermNodes.size() != 4)
-        return SDValue();
-
-      int FirstSrc = 0;
-      std::optional<int> SecondSrc;
-      uint64_t PermMask = 0x00000000;
-      for (size_t i = 0; i < PermNodes.size(); i++) {
-        auto PermOp = PermNodes[i];
-        // Since the mask is applied to Src1:Src2, Src1 bytes must be offset
-        // by sizeof(Src2) = 4
-        int SrcByteAdjust = 4;
-
-        if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
-          if (SecondSrc.has_value())
-            if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
-              return SDValue();
-
-          // Set the index of the second distinct Src node
-          SecondSrc = i;
-          assert(!(PermNodes[*SecondSrc].Src->getValueSizeInBits() % 8));
-          SrcByteAdjust = 0;
-        }
-        assert(PermOp.SrcOffset + SrcByteAdjust < 8);
-        assert(!DAG.getDataLayout().isBigEndian());
-        PermMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
-      }
-
-      SDValue Op = *PermNodes[FirstSrc].Src;
-      SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
-                                              : *PermNodes[FirstSrc].Src;
-
-      // Check that we are not just extracting the bytes in order from an op
-      if (Op == OtherOp && Op.getValueSizeInBits() == 32) {
-        int Low16 = PermMask & 0xffff;
-        int Hi16 = (PermMask & 0xffff0000) >> 16;
-
-        bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
-        bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
-
-        // The perm op would really just produce Op. So combine into Op
-        if (WellFormedLow && WellFormedHi)
-          return DAG.getBitcast(MVT::getIntegerVT(32), Op);
-      }
-
-      if (hasNon16BitAccesses(PermMask, Op, OtherOp)) {
-        SDLoc DL(N);
-        assert(Op.getValueType().isByteSized() &&
-               OtherOp.getValueType().isByteSized());
-
-        // If the ultimate src is less than 32 bits, then we will only be
-        // using bytes 0: Op.getValueSizeInBytes() - 1 in the or.
-        // CalculateByteProvider would not have returned Op as source if we
-        // used a byte that is outside its ValueType. Thus, we are free to
-        // ANY_EXTEND as the extended bits are dont-cares.
-        Op = DAG.getBitcastedAnyExtOrTrunc(Op, DL, MVT::i32);
-        OtherOp = DAG.getBitcastedAnyExtOrTrunc(OtherOp, DL, MVT::i32);
-
-        return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
-                           DAG.getConstant(PermMask, DL, MVT::i32));
-      }
+      if (SDValue Perm = matchPERM(N, DCI))
+        return Perm;
     }
   }
 


        


More information about the llvm-commits mailing list