[llvm] [SLP][REVEC] Fix scalar mask is passed to getScalarizationOverhead but the type is vector. (PR #128476)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 06:36:08 PST 2025
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/128476
>From f4ff7fd33af9c9805b1045162ab750b8bc8ee420 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 23 Feb 2025 21:19:45 -0800
Subject: [PATCH 1/3] [SLP][REVEC] Pre-commit test.
---
.../SLPVectorizer/SystemZ/revec-fix-128169.ll | 25 +++++++++++++++++++
1 file changed, 25 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
new file mode 100644
index 0000000000000..08a0d6ade621f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=arch15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @e(<4 x i16> %0) {
+entry:
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %vec.ind = phi <4 x i16> [ zeroinitializer, %entry ], [ zeroinitializer, %vector.body ]
+ %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %13, %vector.body ]
+ %1 = icmp sgt <4 x i16> %vec.ind, zeroinitializer
+ %2 = zext <4 x i1> %1 to <4 x i32>
+ %3 = add <4 x i16> %vec.ind, zeroinitializer
+ %4 = icmp sgt <4 x i16> %0, zeroinitializer
+ %5 = zext <4 x i1> %4 to <4 x i32>
+ %6 = or <4 x i32> %2, %5
+ %7 = add <4 x i16> zeroinitializer, zeroinitializer
+ %8 = icmp sgt <4 x i16> %3, zeroinitializer
+ %9 = zext <4 x i1> %8 to <4 x i32>
+ %10 = or <4 x i32> %6, %9
+ %11 = icmp sgt <4 x i16> %7, zeroinitializer
+ %12 = zext <4 x i1> %11 to <4 x i32>
+ %13 = or <4 x i32> %10, %12
+ br label %vector.body
+}
>From eae9ade743e63e2d7cbc8a2c332d01e995d8c93d Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 23 Feb 2025 22:15:33 -0800
Subject: [PATCH 2/3] [SLP][REVEC] Fix scalar mask is passed to
getScalarizationOverhead but the type is vector.
Fix "Vector size mismatch".
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 24 +++++++++++++++----
.../SLPVectorizer/SystemZ/revec-fix-128169.ll | 20 ++++++++++++++++
2 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf256d82ae17d..181fee5adbd10 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13963,15 +13963,31 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
ShuffledElements.setBit(I);
ShuffleMask[I] = Res.first->second;
}
- if (!DemandedElements.isZero())
- Cost +=
- TTI->getScalarizationOverhead(VecTy, DemandedElements, /*Insert=*/true,
- /*Extract=*/false, CostKind, VL);
+ if (!DemandedElements.isZero()) {
+ if (isa<FixedVectorType>(ScalarTy)) {
+ assert(SLPReVec && "Only supported by REVEC.");
+ // We don't need to insert elements one by one. Instead, we can insert the
+ // entire vector into the destination.
+ Cost = 0;
+ unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+ for (unsigned I : seq<unsigned>(VL.size()))
+ if (DemandedElements[I])
+ Cost += TTI->getShuffleCost(
+ TTI::SK_InsertSubvector, VecTy, std::nullopt, CostKind,
+ I * ScalarTyNumElements, cast<FixedVectorType>(ScalarTy));
+ } else {
+ Cost += TTI->getScalarizationOverhead(VecTy, DemandedElements,
+ /*Insert=*/true,
+ /*Extract=*/false, CostKind, VL);
+ }
+ }
if (ForPoisonSrc) {
if (isa<FixedVectorType>(ScalarTy)) {
assert(SLPReVec && "Only supported by REVEC.");
// We don't need to insert elements one by one. Instead, we can insert the
// entire vector into the destination.
+ assert(DemandedElements.isZero() &&
+ "Need to consider the cost from DemandedElements.");
Cost = 0;
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
for (unsigned I : seq<unsigned>(VL.size()))
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
index 08a0d6ade621f..be42207e207a0 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
@@ -2,6 +2,26 @@
; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=arch15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
define void @e(<4 x i16> %0) {
+; CHECK-LABEL: @e(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i16> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i16> [[TMP0:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i16> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i16> zeroinitializer, zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32>
+; CHECK-NEXT: [[TMP12]] = or <4 x i32> [[TMP9]], [[TMP11]]
+; CHECK-NEXT: br label [[VECTOR_BODY]]
+;
entry:
br label %vector.body
>From 0f07caa6db81e6c0651ebb47f78162e50f7f1f5f Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 24 Feb 2025 06:35:24 -0800
Subject: [PATCH 3/3] apply comment
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 181fee5adbd10..c87bdf309907b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13972,9 +13972,9 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
for (unsigned I : seq<unsigned>(VL.size()))
if (DemandedElements[I])
- Cost += TTI->getShuffleCost(
- TTI::SK_InsertSubvector, VecTy, std::nullopt, CostKind,
- I * ScalarTyNumElements, cast<FixedVectorType>(ScalarTy));
+ Cost += ::getShuffleCost(*TTI, TTI::SK_InsertSubvector, VecTy, {},
+ CostKind, I * ScalarTyNumElements,
+ cast<FixedVectorType>(ScalarTy));
} else {
Cost += TTI->getScalarizationOverhead(VecTy, DemandedElements,
/*Insert=*/true,
More information about the llvm-commits
mailing list