[llvm] a9d9616 - [RISCV][NFC] Share interleave mask checking logic
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 14 04:02:59 PDT 2023
Author: Luke Lau
Date: 2023-03-14T11:02:52Z
New Revision: a9d9616c0de3f07654ee139bead48b8d78f44e1f
URL: https://github.com/llvm/llvm-project/commit/a9d9616c0de3f07654ee139bead48b8d78f44e1f
DIFF: https://github.com/llvm/llvm-project/commit/a9d9616c0de3f07654ee139bead48b8d78f44e1f.diff
LOG: [RISCV][NFC] Share interleave mask checking logic
This adds two new methods to ShuffleVectorInst, isInterleave and
isInterleaveMask, so that the logic to check if a shuffle mask is an
interleave can be shared across the TTI, codegen and the interleaved
access pass.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D145971
Added:
Modified:
llvm/include/llvm/IR/Instructions.h
llvm/lib/CodeGen/InterleavedAccessPass.cpp
llvm/lib/IR/Instructions.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
llvm/unittests/IR/ShuffleVectorInstTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index fb6faec3ad848..ee38a118f182d 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -2430,6 +2430,33 @@ class ShuffleVectorInst : public Instruction {
}
}
+ /// Return if this shuffle interleaves its two input vectors together.
+ bool isInterleave(unsigned Factor);
+
+ /// Return true if the mask interleaves one or more input vectors together.
+ ///
+ /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
+ /// E.g. For a Factor of 2 (LaneLen=4):
+ /// <0, 4, 1, 5, 2, 6, 3, 7>
+ /// E.g. For a Factor of 3 (LaneLen=4):
+ /// <4, 0, 9, 5, 1, 10, 6, 2, 11, 7, 3, 12>
+ /// E.g. For a Factor of 4 (LaneLen=2):
+ /// <0, 2, 6, 4, 1, 3, 7, 5>
+ ///
+ /// NumInputElts is the total number of elements in the input vectors.
+ ///
+ /// StartIndexes are the first indexes of each vector being interleaved,
+ /// substituting any indexes that were undef
+ /// E.g. <4, -1, 2, 5, 1, 3> (Factor=3): StartIndexes=<4, 0, 2>
+ static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
+ unsigned NumInputElts,
+ SmallVectorImpl<unsigned> &StartIndexes);
+ static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
+ unsigned NumInputElts) {
+ SmallVector<unsigned, 8> StartIndexes;
+ return isInterleaveMask(Mask, Factor, NumInputElts, StartIndexes);
+ }
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ShuffleVector;
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 0582378be4cd8..95c4fafba8640 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -202,86 +202,15 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
/// The particular case of an RE-interleave mask is:
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
-static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned MaxFactor, unsigned OpNumElts) {
- unsigned NumElts = Mask.size();
+static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
+ unsigned MaxFactor) {
+ unsigned NumElts = SVI->getShuffleMask().size();
if (NumElts < 4)
return false;
// Check potential Factors.
for (Factor = 2; Factor <= MaxFactor; Factor++) {
- if (NumElts % Factor)
- continue;
-
- unsigned LaneLen = NumElts / Factor;
- if (!isPowerOf2_32(LaneLen))
- continue;
-
- // Check whether each element matches the general interleaved rule.
- // Ignore undef elements, as long as the defined elements match the rule.
- // Outer loop processes all factors (x, y, z in the above example)
- unsigned I = 0, J;
- for (; I < Factor; I++) {
- unsigned SavedLaneValue;
- unsigned SavedNoUndefs = 0;
-
- // Inner loop processes consecutive accesses (x, x+1... in the example)
- for (J = 0; J < LaneLen - 1; J++) {
- // Lane computes x's position in the Mask
- unsigned Lane = J * Factor + I;
- unsigned NextLane = Lane + Factor;
- int LaneValue = Mask[Lane];
- int NextLaneValue = Mask[NextLane];
-
- // If both are defined, values must be sequential
- if (LaneValue >= 0 && NextLaneValue >= 0 &&
- LaneValue + 1 != NextLaneValue)
- break;
-
- // If the next value is undef, save the current one as reference
- if (LaneValue >= 0 && NextLaneValue < 0) {
- SavedLaneValue = LaneValue;
- SavedNoUndefs = 1;
- }
-
- // Undefs are allowed, but defined elements must still be consecutive:
- // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
- // Verify this by storing the last non-undef followed by an undef
- // Check that following non-undef masks are incremented with the
- // corresponding distance.
- if (SavedNoUndefs > 0 && LaneValue < 0) {
- SavedNoUndefs++;
- if (NextLaneValue >= 0 &&
- SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
- break;
- }
- }
-
- if (J < LaneLen - 1)
- break;
-
- int StartMask = 0;
- if (Mask[I] >= 0) {
- // Check that the start of the I range (J=0) is greater than 0
- StartMask = Mask[I];
- } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
- // StartMask defined by the last value in lane
- StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
- } else if (SavedNoUndefs > 0) {
- // StartMask defined by some non-zero value in the j loop
- StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
- }
- // else StartMask remains set to 0, i.e. all elements are undefs
-
- if (StartMask < 0)
- break;
- // We must stay within the vectors; This case can happen with undefs.
- if (StartMask + LaneLen > OpNumElts*2)
- break;
- }
-
- // Found an interleaved mask of current factor.
- if (I == Factor)
+ if (SVI->isInterleave(Factor))
return true;
}
@@ -500,9 +429,7 @@ bool InterleavedAccess::lowerInterleavedStore(
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
- unsigned OpNumElts =
- cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
- if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
+ if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 5e25c31a83074..a1cc580998ee4 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2728,6 +2728,98 @@ bool ShuffleVectorInst::isOneUseSingleSourceMask(int VF) const {
return isOneUseSingleSourceMask(ShuffleMask, VF);
}
+bool ShuffleVectorInst::isInterleave(unsigned Factor) {
+ FixedVectorType *OpTy = dyn_cast<FixedVectorType>(getOperand(0)->getType());
+ // shuffle_vector can only interleave fixed length vectors - for scalable
+ // vectors, see the @llvm.experimental.vector.interleave2 intrinsic
+ if (!OpTy)
+ return false;
+ unsigned OpNumElts = OpTy->getNumElements();
+
+ return isInterleaveMask(ShuffleMask, Factor, OpNumElts * 2);
+}
+
+bool ShuffleVectorInst::isInterleaveMask(
+ ArrayRef<int> Mask, unsigned Factor, unsigned NumInputElts,
+ SmallVectorImpl<unsigned> &StartIndexes) {
+ unsigned NumElts = Mask.size();
+ if (NumElts % Factor)
+ return false;
+
+ unsigned LaneLen = NumElts / Factor;
+ if (!isPowerOf2_32(LaneLen))
+ return false;
+
+ StartIndexes.resize(Factor);
+
+ // Check whether each element matches the general interleaved rule.
+ // Ignore undef elements, as long as the defined elements match the rule.
+ // Outer loop processes all factors (x, y, z in the above example)
+ unsigned I = 0, J;
+ for (; I < Factor; I++) {
+ unsigned SavedLaneValue;
+ unsigned SavedNoUndefs = 0;
+
+ // Inner loop processes consecutive accesses (x, x+1... in the example)
+ for (J = 0; J < LaneLen - 1; J++) {
+ // Lane computes x's position in the Mask
+ unsigned Lane = J * Factor + I;
+ unsigned NextLane = Lane + Factor;
+ int LaneValue = Mask[Lane];
+ int NextLaneValue = Mask[NextLane];
+
+ // If both are defined, values must be sequential
+ if (LaneValue >= 0 && NextLaneValue >= 0 &&
+ LaneValue + 1 != NextLaneValue)
+ break;
+
+ // If the next value is undef, save the current one as reference
+ if (LaneValue >= 0 && NextLaneValue < 0) {
+ SavedLaneValue = LaneValue;
+ SavedNoUndefs = 1;
+ }
+
+ // Undefs are allowed, but defined elements must still be consecutive:
+ // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
+ // Verify this by storing the last non-undef followed by an undef
+ // Check that following non-undef masks are incremented with the
+ // corresponding distance.
+ if (SavedNoUndefs > 0 && LaneValue < 0) {
+ SavedNoUndefs++;
+ if (NextLaneValue >= 0 &&
+ SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
+ break;
+ }
+ }
+
+ if (J < LaneLen - 1)
+ return false;
+
+ int StartMask = 0;
+ if (Mask[I] >= 0) {
+ // Check that the start of the I range (J=0) is greater than 0
+ StartMask = Mask[I];
+ } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
+ // StartMask defined by the last value in lane
+ StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
+ } else if (SavedNoUndefs > 0) {
+ // StartMask defined by some non-zero value in the j loop
+ StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
+ }
+ // else StartMask remains set to 0, i.e. all elements are undefs
+
+ if (StartMask < 0)
+ return false;
+ // We must stay within the vectors; This case can happen with undefs.
+ if (StartMask + LaneLen > NumInputElts)
+ return false;
+
+ StartIndexes[I] = StartMask;
+ }
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// InsertValueInst Class
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c8a9d02d7062d..8f68dab55284c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
@@ -3062,46 +3063,19 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
return false;
int Size = Mask.size();
- int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
- int Srcs[] = {-1, -1};
- for (int i = 0; i != Size; ++i) {
- // Ignore undef elements.
- if (Mask[i] < 0)
- continue;
-
- // Is this an even or odd element.
- int Pol = i % 2;
+ SmallVector<unsigned, 2> StartIndexes;
+ if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
+ return false;
- // Ensure we consistently use the same half source for this polarity.
- int Src = alignDown(Mask[i], HalfSize);
- if (Srcs[Pol] < 0)
- Srcs[Pol] = Src;
- if (Srcs[Pol] != Src)
- return false;
-
- // Make sure the element within the source is appropriate for this element
- // in the destination.
- int Elt = Mask[i] % HalfSize;
- if (Elt != i / 2)
- return false;
- }
+ EvenSrc = StartIndexes[0] % 2 ? StartIndexes[1] : StartIndexes[0];
+ OddSrc = StartIndexes[0] % 2 ? StartIndexes[0] : StartIndexes[1];
// One source should be low half of first vector.
- if (Srcs[0] != 0 && Srcs[1] != 0)
+ if (EvenSrc != 0 && OddSrc != 0)
return false;
- // Other source should be the upper half of the first source or the lower
- // half of the second source.
- // FIXME: This is only a heuristic to avoid regressions.
- if (Srcs[0] != HalfSize && Srcs[0] != Size && Srcs[1] != HalfSize &&
- Srcs[1] != Size)
- return false;
-
- EvenSrc = Srcs[0];
- OddSrc = Srcs[1];
-
return true;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 1e78ef6e8f7a2..07ca7b9af6117 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -10,10 +10,10 @@
#include "MCTargetDesc/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Instructions.h"
#include <cmath>
#include <optional>
using namespace llvm;
@@ -261,16 +261,17 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
MVT EltTp = LT.second.getVectorElementType();
// If the size of the element is < ELEN then shuffles of interleaves and
- // deinterleaves of 2 vectors can be lowered into the following sequences
+ // deinterleaves of 2 vectors can be lowered into the following
+ // sequences
if (EltTp.getScalarSizeInBits() < ST->getELEN()) {
auto InterleaveMask = createInterleaveMask(Mask.size() / 2, 2);
// Example sequence:
- // vsetivli zero, 4, e8, mf4, ta, ma (ignored)
+ // vsetivli zero, 4, e8, mf4, ta, ma (ignored)
// vwaddu.vv v10, v8, v9
// li a0, -1 (ignored)
// vwmaccu.vx v10, a0, v9
- if (equal(InterleaveMask, Mask))
- return 2 * LT.first * getLMULCost(LT.second);
+ if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size() * 2))
+ return 2 * LT.first * getLMULCost(LT.second);
if (Mask[0] == 0 || Mask[0] == 1) {
auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
index aed68b52fe2cc..97480d8e813c6 100644
--- a/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll
@@ -1,6 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV32
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v | FileCheck %s -check-prefixes=CHECK,RV64
+
+; The mask here interleaves (%v1, %v0), not (%v0, %v1): it should still be cheap.
+define <4 x i8> @interleave2_v2i8(<2 x i8> %v0, <2 x i8> %v1) {
+; CHECK-LABEL: 'interleave2_v2i8'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i8> %res
+;
+ %concat = shufflevector <2 x i8> %v0, <2 x i8> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = shufflevector <4 x i8> %concat, <4 x i8> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+ ret <4 x i8> %res
+}
+
define <8 x i8> @interleave2_v8i8(<4 x i8> %v0, <4 x i8> %v1) {
; CHECK-LABEL: 'interleave2_v8i8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %concat = shufflevector <4 x i8> %v0, <4 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
diff --git a/llvm/unittests/IR/ShuffleVectorInstTest.cpp b/llvm/unittests/IR/ShuffleVectorInstTest.cpp
index cf4f91cd2a84d..ba1b9807cffb4 100644
--- a/llvm/unittests/IR/ShuffleVectorInstTest.cpp
+++ b/llvm/unittests/IR/ShuffleVectorInstTest.cpp
@@ -116,4 +116,33 @@ TEST(ShuffleVectorInst, isOneUseSingleSourceMask) {
ShuffleVectorInst::isOneUseSingleSourceMask({0, 1, 2, 3, 3, 3, 1, 0}, 4));
}
+TEST(ShuffleVectorInst, isInterleaveMask) {
+ SmallVector<unsigned> StartIndexes;
+ ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({0, 4, 1, 5, 2, 6, 3, 7}, 2,
+ 8, StartIndexes));
+ ASSERT_EQ(StartIndexes, SmallVector<unsigned>({0, 4}));
+
+ ASSERT_FALSE(
+ ShuffleVectorInst::isInterleaveMask({0, 4, 1, 6, 2, 6, 3, 7}, 2, 8));
+
+ ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, 0, 5, 1, 6, 2, 7, 3}, 2,
+ 8, StartIndexes));
+ ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 0}));
+
+ ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, 0, -1, 1, -1, 2, 7, 3}, 2,
+ 8, StartIndexes));
+ ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 0}));
+
+ ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({0, 2, 4, 1, 3, 5}, 3, 6,
+ StartIndexes));
+ ASSERT_EQ(StartIndexes, SmallVector<unsigned>({0, 2, 4}));
+
+ ASSERT_TRUE(ShuffleVectorInst::isInterleaveMask({4, -1, 0, 5, 3, 1}, 3, 6,
+ StartIndexes));
+ ASSERT_EQ(StartIndexes, SmallVector<unsigned>({4, 2, 0}));
+
+ ASSERT_FALSE(
+ ShuffleVectorInst::isInterleaveMask({8, 2, 12, 4, 9, 3, 13, 5}, 4, 8));
+}
+
} // end anonymous namespace
More information about the llvm-commits
mailing list