[llvm] a9d9616 - [RISCV][NFC] Share interleave mask checking logic

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 14 04:02:59 PDT 2023


Author: Luke Lau
Date: 2023-03-14T11:02:52Z
New Revision: a9d9616c0de3f07654ee139bead48b8d78f44e1f

URL: https://github.com/llvm/llvm-project/commit/a9d9616c0de3f07654ee139bead48b8d78f44e1f
DIFF: https://github.com/llvm/llvm-project/commit/a9d9616c0de3f07654ee139bead48b8d78f44e1f.diff

LOG: [RISCV][NFC] Share interleave mask checking logic

This adds two new methods to ShuffleVectorInst, isInterleave and
isInterleaveMask, so that the logic to check if a shuffle mask is an
interleave can be shared across the TTI, codegen and the interleaved
access pass.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D145971

Added: 
    

Modified: 
    llvm/include/llvm/IR/Instructions.h
    llvm/lib/CodeGen/InterleavedAccessPass.cpp
    llvm/lib/IR/Instructions.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
    llvm/unittests/IR/ShuffleVectorInstTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index fb6faec3ad848..ee38a118f182d 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -2430,6 +2430,33 @@ class ShuffleVectorInst : public Instruction {
     }
   }
 
+  /// Return if this shuffle interleaves its two input vectors together.
+  bool isInterleave(unsigned Factor);
+
+  /// Return true if the mask interleaves one or more input vectors together.
+  ///
+  /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
+  /// E.g. For a Factor of 2 (LaneLen=4):
+  ///   <0, 4, 1, 5, 2, 6, 3, 7>
+  /// E.g. For a Factor of 3 (LaneLen=4):
+  ///   <4, 0, 9, 5, 1, 10, 6, 2, 11, 7, 3, 12>
+  /// E.g. For a Factor of 4 (LaneLen=2):
+  ///   <0, 2, 6, 4, 1, 3, 7, 5>
+  ///
+  /// NumInputElts is the total number of elements in the input vectors.
+  ///
+  /// StartIndexes are the first indexes of each vector being interleaved,
+  /// substituting any indexes that were undef
+  /// E.g. <4, -1, 2, 5, 1, 3> (Factor=3): StartIndexes=<4, 0, 2>
+  static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
+                               unsigned NumInputElts,
+                               SmallVectorImpl<unsigned> &StartIndexes);
+  static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
+                               unsigned NumInputElts) {
+    SmallVector<unsigned, 8> StartIndexes;
+    return isInterleaveMask(Mask, Factor, NumInputElts, StartIndexes);
+  }
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ShuffleVector;

diff  --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 0582378be4cd8..95c4fafba8640 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -202,86 +202,15 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
 /// The particular case of an RE-interleave mask is:
 /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
 /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
-static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
-                               unsigned MaxFactor, unsigned OpNumElts) {
-  unsigned NumElts = Mask.size();
+static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
+                               unsigned MaxFactor) {
+  unsigned NumElts = SVI->getShuffleMask().size();
   if (NumElts < 4)
     return false;
 
   // Check potential Factors.
   for (Factor = 2; Factor <= MaxFactor; Factor++) {
-    if (NumElts % Factor)
-      continue;
-
-    unsigned LaneLen = NumElts / Factor;
-    if (!isPowerOf2_32(LaneLen))
-      continue;
-
-    // Check whether each element matches the general interleaved rule.
-    // Ignore undef elements, as long as the defined elements match the rule.
-    // Outer loop processes all factors (x, y, z in the above example)
-    unsigned I = 0, J;
-    for (; I < Factor; I++) {
-      unsigned SavedLaneValue;
-      unsigned SavedNoUndefs = 0;
-
-      // Inner loop processes consecutive accesses (x, x+1... in the example)
-      for (J = 0; J < LaneLen - 1; J++) {
-        // Lane computes x's position in the Mask
-        unsigned Lane = J * Factor + I;
-        unsigned NextLane = Lane + Factor;
-        int LaneValue = Mask[Lane];
-        int NextLaneValue = Mask[NextLane];
-
-        // If both are defined, values must be sequential
-        if (LaneValue >= 0 && NextLaneValue >= 0 &&
-            LaneValue + 1 != NextLaneValue)
-          break;
-
-        // If the next value is undef, save the current one as reference
-        if (LaneValue >= 0 && NextLaneValue < 0) {
-          SavedLaneValue = LaneValue;
-          SavedNoUndefs = 1;
-        }
-
-        // Undefs are allowed, but defined elements must still be consecutive:
-        // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
-        // Verify this by storing the last non-undef followed by an undef
-        // Check that following non-undef masks are incremented with the
-        // corresponding distance.
-        if (SavedNoUndefs > 0 && LaneValue < 0) {
-          SavedNoUndefs++;
-          if (NextLaneValue >= 0 &&
-              SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
-            break;
-        }
-      }
-
-      if (J < LaneLen - 1)
-        break;
-
-      int StartMask = 0;
-      if (Mask[I] >= 0) {
-        // Check that the start of the I range (J=0) is greater than 0
-        StartMask = Mask[I];
-      } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
-        // StartMask defined by the last value in lane
-        StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
-      } else if (SavedNoUndefs > 0) {
-        // StartMask defined by some non-zero value in the j loop
-        StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
-      }
-      // else StartMask remains set to 0, i.e. all elements are undefs
-
-      if (StartMask < 0)
-        break;
-      // We must stay within the vectors; This case can happen with undefs.
-      if (StartMask + LaneLen > OpNumElts*2)
-        break;
-    }
-
-    // Found an interleaved mask of current factor.
-    if (I == Factor)
+    if (SVI->isInterleave(Factor))
       return true;
   }
 
@@ -500,9 +429,7 @@ bool InterleavedAccess::lowerInterleavedStore(
 
   // Check if the shufflevector is RE-interleave shuffle.
   unsigned Factor;
-  unsigned OpNumElts =
-      cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
-  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
+  if (!isReInterleaveMask(SVI, Factor, MaxFactor))
     return false;
 
   LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");

diff  --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 5e25c31a83074..a1cc580998ee4 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2728,6 +2728,98 @@ bool ShuffleVectorInst::isOneUseSingleSourceMask(int VF) const {
   return isOneUseSingleSourceMask(ShuffleMask, VF);
 }
 
+bool ShuffleVectorInst::isInterleave(unsigned Factor) {
+  FixedVectorType *OpTy = dyn_cast<FixedVectorType>(getOperand(0)->getType());
+  // shuffle_vector can only interleave fixed length vectors - for scalable
+  // vectors, see the @llvm.experimental.vector.interleave2 intrinsic
+  if (!OpTy)
+    return false;
+  unsigned OpNumElts = OpTy->getNumElements();
+
+  return isInterleaveMask(ShuffleMask, Factor, OpNumElts * 2);
+}
+
+bool ShuffleVectorInst::isInterleaveMask(
+    ArrayRef<int> Mask, unsigned Factor, unsigned NumInputElts,
+    SmallVectorImpl<unsigned> &StartIndexes) {
+  unsigned NumElts = Mask.size();
+  if (NumElts % Factor)
+    return false;
+
+  unsigned LaneLen = NumElts / Factor;
+  if (!isPowerOf2_32(LaneLen))
+    return false;
+
+  StartIndexes.resize(Factor);
+
+  // Check whether each element matches the general interleaved rule.
+  // Ignore undef elements, as long as the defined elements match the rule.
+  // Outer loop processes all factors (x, y, z in the above example)
+  unsigned I = 0, J;
+  for (; I < Factor; I++) {
+    unsigned SavedLaneValue;
+    unsigned SavedNoUndefs = 0;
+
+    // Inner loop processes consecutive accesses (x, x+1... in the example)
+    for (J = 0; J < LaneLen - 1; J++) {
+      // Lane computes x's position in the Mask
+      unsigned Lane = J * Factor + I;
+      unsigned NextLane = Lane + Factor;
+      int LaneValue = Mask[Lane];
+      int NextLaneValue = Mask[NextLane];
+
+      // If both are defined, values must be sequential
+      if (LaneValue >= 0 && NextLaneValue >= 0 &&
+          LaneValue + 1 != NextLaneValue)
+        break;
+
+      // If the next value is undef, save the current one as reference
+      if (LaneValue >= 0 && NextLaneValue < 0) {
+        SavedLaneValue = LaneValue;
+        SavedNoUndefs = 1;
+      }
+
+      // Undefs are allowed, but defined elements must still be consecutive:
+      // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
+      // Verify this by storing the last non-undef followed by an undef
+      // Check that following non-undef masks are incremented with the
+      // corresponding distance.
+      if (SavedNoUndefs > 0 && LaneValue < 0) {
+        SavedNoUndefs++;
+        if (NextLaneValue >= 0 &&
+            SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
+          break;
+      }
+    }
+
+    if (J < LaneLen - 1)
+      return false;
+
+    int StartMask = 0;
+    if (Mask[I] >= 0) {
+      // Check that the start of the I range (J=0) is greater than 0
+      StartMask = Mask[I];
+    } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
+      // StartMask defined by the last value in lane
+      StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
+    } else if (SavedNoUndefs > 0) {
+      // StartMask defined by some non-zero value in the j loop
+      StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
+    }
+    // else StartMask remains set to 0, i.e. all elements are undefs
+
+    if (StartMask < 0)
+      return false;
+    // We must stay within the vectors; This case can happen with undefs.
+    if (StartMask + LaneLen > NumInputElts)
+      return false;
+
+    StartIndexes[I] = StartMask;
+  }
+
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 //                             InsertValueInst Class
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c8a9d02d7062d..8f68dab55284c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -31,6 +31,7 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
@@ -3062,46 +3063,19 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
     return false;
 
   int Size = Mask.size();
-  int HalfSize = Size / 2;
   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
 
-  int Srcs[] = {-1, -1};
-  for (int i = 0; i != Size; ++i) {
-    // Ignore undef elements.
-    if (Mask[i] < 0)
-      continue;
-
-    // Is this an even or odd element.
-    int Pol = i % 2;
+  SmallVector<unsigned, 2> StartIndexes;
+  if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
+    return false;
 
-    // Ensure we consistently use the same half source for this polarity.
-    int Src = alignDown(Mask[i], HalfSize);
-    if (Srcs[Pol] < 0)
-      Srcs[Pol] = Src;
-    if (Srcs[Pol] != Src)
-      return false;
-
-    // Make sure the element within the source is appropriate for this element
-    // in the destination.
-    int Elt = Mask[i] % HalfSize;
-    if (Elt != i / 2)
-      return false;
-  }
+  EvenSrc = StartIndexes[0] % 2 ? StartIndexes[1] : StartIndexes[0];
+  OddSrc = StartIndexes[0] % 2 ? StartIndexes[0] : StartIndexes[1];
 
   // One source should be low half of first vector.
-  if (Srcs[0] != 0 && Srcs[1] != 0)
+  if (EvenSrc != 0 && OddSrc != 0)
     return false;
 
-  // Other source should be the upper half of the first source or the lower
-  // half of the second source.
-  // FIXME: This is only a heuristic to avoid regressions.
-  if (Srcs[0] != HalfSize && Srcs[0] != Size && Srcs[1] != HalfSize &&
-      Srcs[1] != Size)
-    return false;
-
-  EvenSrc = Srcs[0];
-  OddSrc = Srcs[1];
-
   return true;
 }
 

diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 1e78ef6e8f7a2..07ca7b9af6117 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -10,10 +10,10 @@
 #include "MCTargetDesc/RISCVMatInt.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Instructions.h"
 #include <cmath>
 #include <optional>
 using namespace llvm;
@@ -261,16 +261,17 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
       if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
         MVT EltTp = LT.second.getVectorElementType();
         // If the size of the element is < ELEN then shuffles of interleaves and
-        // deinterleaves of 2 vectors can be lowered into the following sequences
+        // deinterleaves of 2 vectors can be lowered into the following
+        // sequences
         if (EltTp.getScalarSizeInBits() < ST->getELEN()) {
           auto InterleaveMask = createInterleaveMask(Mask.size() / 2, 2);
           // Example sequence:
-          //   vsetivli	zero, 4, e8, mf4, ta, ma (ignored)
+          //   vsetivli		zero, 4, e8, mf4, ta, ma (ignored)
           //   vwaddu.vv	v10, v8, v9
           //   li		a0, -1                   (ignored)
           //   vwmaccu.vx	v10, a0, v9
-          if (equal(InterleaveMask, Mask))
-            return 2 * LT.first * getLMULCost(LT.second);
+	  if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size() * 2))
+	    return 2 * LT.first * getLMULCost(LT.second);
 
           if (Mask[0] == 0 || Mask[0] == 1) {
             auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());

diff  --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
index aed68b52fe2cc..97480d8e813c6 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
@@ -1,6 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV32
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV64
+
+; The mask here interleaves (%v1, %v0), not (%v0, %v1): it should still be cheap.
+define <4 x i8> @interleave2_v2i8(<2 x i8> %v0, <2 x i8> %v1) {
+; CHECK-LABEL: 'interleave2_v2i8'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i8> %res
+;
+  %concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+  ret <4 x i8> %res
+}
+
 define <8 x i8> @interleave2_v8i8(<4 x i8> %v0, <4 x i8> %v1) {
 ; CHECK-LABEL: 'interleave2_v8i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>

diff  --git a/llvm/unittests/IR/ShuffleVectorInstTest.cpp b/llvm/unittests/IR/ShuffleVectorInstTest.cpp
index cf4f91cd2a84d..ba1b9807cffb4 100644
--- a/llvm/unittests/IR/ShuffleVectorInstTest.cpp
+++ b/llvm/unittests/IR/ShuffleVectorInstTest.cpp
@@ -116,4 +116,33 @@ TEST(ShuffleVectorInst, isOneUseSingleSourceMask) {
       ShuffleVectorInst::isOneUseSingleSourceMask({0, 1, 2, 3, 3, 3, 1, 0}, 4));
 }
 
+TEST(ShuffleVectorInst, isInterleaveMask) {
+  SmallVector<unsigned> StartIndexes;
+  ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({0, 4, 1, 5, 2, 6, 3, 7}, 2,
+                                                  8, StartIndexes));
+  ASSERT_EQ(StartIndexes, SmallVector<unsigned>({0, 4}));
+
+  ASSERT_FALSE(
+      ShuffleVectorInst::isInterleaveMask({0, 4, 1, 6, 2, 6, 3, 7}, 2, 8));
+
+  ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, 0, 5, 1, 6, 2, 7, 3}, 2,
+                                                  8, StartIndexes));
+  ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 0}));
+
+  ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, 0, -1, 1, -1, 2, 7, 3}, 2,
+                                                  8, StartIndexes));
+  ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 0}));
+
+  ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({0, 2, 4, 1, 3, 5}, 3, 6,
+                                                  StartIndexes));
+  ASSERT_EQ(StartIndexes, SmallVector<unsigned>({0, 2, 4}));
+
+  ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, -1, 0, 5, 3, 1}, 3, 6,
+                                                  StartIndexes));
+  ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 2, 0}));
+
+  ASSERT_FALSE(
+      ShuffleVectorInst::isInterleaveMask({8, 2, 12, 4, 9, 3, 13, 5}, 4, 8));
+}
+
 } // end anonymous namespace


        


More information about the llvm-commits mailing list