[llvm] 60ac347 - [SLP][REVEC] Make getAltInstrMask and getGatherCost vectorize vector instructions. (#99461)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 19:39:04 PDT 2024
Author: Han-Kuan Chen
Date: 2024-08-08T10:39:01+08:00
New Revision: 60ac34701e5c50955232d6d289b04779f4ef9912
URL: https://github.com/llvm/llvm-project/commit/60ac34701e5c50955232d6d289b04779f4ef9912
DIFF: https://github.com/llvm/llvm-project/commit/60ac34701e5c50955232d6d289b04779f4ef9912.diff
LOG: [SLP][REVEC] Make getAltInstrMask and getGatherCost vectorize vector instructions. (#99461)
Added:
llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1dc291ebddc0d..7619e744f7a2f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1047,10 +1047,13 @@ static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
/// Opcode1.
SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
unsigned Opcode1) {
- SmallBitVector OpcodeMask(VL.size(), false);
+ Type *ScalarTy = VL[0]->getType();
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ SmallBitVector OpcodeMask(VL.size() * ScalarTyNumElements, false);
for (unsigned Lane : seq<unsigned>(VL.size()))
if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
- OpcodeMask.set(Lane);
+ OpcodeMask.set(Lane * ScalarTyNumElements,
+ Lane * ScalarTyNumElements + ScalarTyNumElements);
return OpcodeMask;
}
@@ -11491,7 +11494,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
// Find the cost of inserting/extracting values from the vector.
// Check if the same elements are inserted several times and count them as
// shuffle candidates.
- APInt ShuffledElements = APInt::getZero(VL.size());
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ APInt ShuffledElements = APInt::getZero(VecTy->getNumElements());
DenseMap<Value *, unsigned> UniqueElements;
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Cost;
@@ -11511,7 +11515,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
Value *V = VL[I];
// No need to shuffle duplicates for constants.
if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
- ShuffledElements.setBit(I);
+ ShuffledElements.setBits(I * ScalarTyNumElements,
+ I * ScalarTyNumElements + ScalarTyNumElements);
ShuffleMask[I] = isa<PoisonValue>(V) ? PoisonMaskElem : I;
continue;
}
@@ -11524,7 +11529,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
}
DuplicateNonConst = true;
- ShuffledElements.setBit(I);
+ ShuffledElements.setBits(I * ScalarTyNumElements,
+ I * ScalarTyNumElements + ScalarTyNumElements);
ShuffleMask[I] = Res.first->second;
}
if (ForPoisonSrc)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
new file mode 100644
index 0000000000000..dfe9799b2a7de
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduction-logical.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=slp-vectorizer -slp-revec -mtriple=x86_64 -S | FileCheck %s
+
+define i1 @logical_and_icmp_
diff _preds(<4 x i32> %x) {
+; CHECK-LABEL: @logical_and_icmp_
diff _preds(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: ret i1 [[TMP7]]
+;
+ %x0 = extractelement <4 x i32> %x, i32 0
+ %x1 = extractelement <4 x i32> %x, i32 1
+ %x2 = extractelement <4 x i32> %x, i32 2
+ %x3 = extractelement <4 x i32> %x, i32 3
+ %c0 = icmp ult i32 %x0, 0
+ %c1 = icmp slt i32 %x1, 0
+ %c2 = icmp sgt i32 %x2, 0
+ %c3 = icmp slt i32 %x3, 0
+ %s1 = select i1 %c0, i1 %c1, i1 false
+ %s2 = select i1 %s1, i1 %c2, i1 false
+ %s3 = select i1 %s2, i1 %c3, i1 false
+ ret i1 %s3
+}
+
+define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
+; CHECK-LABEL: @logical_and_icmp_clamp(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %x0 = extractelement <4 x i32> %x, i32 0
+ %x1 = extractelement <4 x i32> %x, i32 1
+ %x2 = extractelement <4 x i32> %x, i32 2
+ %x3 = extractelement <4 x i32> %x, i32 3
+ %c0 = icmp slt i32 %x0, 42
+ %c1 = icmp slt i32 %x1, 42
+ %c2 = icmp slt i32 %x2, 42
+ %c3 = icmp slt i32 %x3, 42
+ %d0 = icmp sgt i32 %x0, 17
+ %d1 = icmp sgt i32 %x1, 17
+ %d2 = icmp sgt i32 %x2, 17
+ %d3 = icmp sgt i32 %x3, 17
+ %s1 = select i1 %c0, i1 %c1, i1 false
+ %s2 = select i1 %s1, i1 %c2, i1 false
+ %s3 = select i1 %s2, i1 %c3, i1 false
+ %s4 = select i1 %s3, i1 %d0, i1 false
+ %s5 = select i1 %s4, i1 %d1, i1 false
+ %s6 = select i1 %s5, i1 %d2, i1 false
+ %s7 = select i1 %s6, i1 %d3, i1 false
+ ret i1 %s7
+}
More information about the llvm-commits
mailing list