[llvm] 3a6108b - [SLP][REVEC] Fix scalar mask is passed to getScalarizationOverhead but the type is vector. (#128476)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 07:43:31 PST 2025
Author: Han-Kuan Chen
Date: 2025-02-24T23:43:27+08:00
New Revision: 3a6108bcac26016b791cabce86424c1f1dcf3056
URL: https://github.com/llvm/llvm-project/commit/3a6108bcac26016b791cabce86424c1f1dcf3056
DIFF: https://github.com/llvm/llvm-project/commit/3a6108bcac26016b791cabce86424c1f1dcf3056.diff
LOG: [SLP][REVEC] Fix scalar mask is passed to getScalarizationOverhead but the type is vector. (#128476)
Fix "Vector size mismatch".
Added:
llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5fc5fb10fad55..3d660b63309d4 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13968,15 +13968,31 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
ShuffledElements.setBit(I);
ShuffleMask[I] = Res.first->second;
}
- if (!DemandedElements.isZero())
- Cost +=
- TTI->getScalarizationOverhead(VecTy, DemandedElements, /*Insert=*/true,
- /*Extract=*/false, CostKind, VL);
+ if (!DemandedElements.isZero()) {
+ if (isa<FixedVectorType>(ScalarTy)) {
+ assert(SLPReVec && "Only supported by REVEC.");
+ // We don't need to insert elements one by one. Instead, we can insert the
+ // entire vector into the destination.
+ Cost = 0;
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ for (unsigned I : seq<unsigned>(VL.size()))
+ if (DemandedElements[I])
+ Cost += ::getShuffleCost(*TTI, TTI::SK_InsertSubvector, VecTy, {},
+ CostKind, I * ScalarTyNumElements,
+ cast<FixedVectorType>(ScalarTy));
+ } else {
+ Cost += TTI->getScalarizationOverhead(VecTy, DemandedElements,
+ /*Insert=*/true,
+ /*Extract=*/false, CostKind, VL);
+ }
+ }
if (ForPoisonSrc) {
if (isa<FixedVectorType>(ScalarTy)) {
assert(SLPReVec && "Only supported by REVEC.");
// We don't need to insert elements one by one. Instead, we can insert the
// entire vector into the destination.
+ assert(DemandedElements.isZero() &&
+ "Need to consider the cost from DemandedElements.");
Cost = 0;
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
for (unsigned I : seq<unsigned>(VL.size()))
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
new file mode 100644
index 0000000000000..be42207e207a0
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=arch15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @e(<4 x i16> %0) {
+; CHECK-LABEL: @e(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i16> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i16> [[TMP0:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i16> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i16> zeroinitializer, zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32>
+; CHECK-NEXT: [[TMP12]] = or <4 x i32> [[TMP9]], [[TMP11]]
+; CHECK-NEXT: br label [[VECTOR_BODY]]
+;
+entry:
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %vec.ind = phi <4 x i16> [ zeroinitializer, %entry ], [ zeroinitializer, %vector.body ]
+ %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %13, %vector.body ]
+ %1 = icmp sgt <4 x i16> %vec.ind, zeroinitializer
+ %2 = zext <4 x i1> %1 to <4 x i32>
+ %3 = add <4 x i16> %vec.ind, zeroinitializer
+ %4 = icmp sgt <4 x i16> %0, zeroinitializer
+ %5 = zext <4 x i1> %4 to <4 x i32>
+ %6 = or <4 x i32> %2, %5
+ %7 = add <4 x i16> zeroinitializer, zeroinitializer
+ %8 = icmp sgt <4 x i16> %3, zeroinitializer
+ %9 = zext <4 x i1> %8 to <4 x i32>
+ %10 = or <4 x i32> %6, %9
+ %11 = icmp sgt <4 x i16> %7, zeroinitializer
+ %12 = zext <4 x i1> %11 to <4 x i32>
+ %13 = or <4 x i32> %10, %12
+ br label %vector.body
+}
More information about the llvm-commits
mailing list