[llvm] [SLP][REVEC] Fix scalar mask is passed to getScalarizationOverhead but the type is vector. (PR #128476)

Han-Kuan Chen via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 06:36:08 PST 2025


https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/128476

>From f4ff7fd33af9c9805b1045162ab750b8bc8ee420 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 23 Feb 2025 21:19:45 -0800
Subject: [PATCH 1/3] [SLP][REVEC] Pre-commit test.

---
 .../SLPVectorizer/SystemZ/revec-fix-128169.ll | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
new file mode 100644
index 0000000000000..08a0d6ade621f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=arch15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @e(<4 x i16> %0) {
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %vec.ind = phi <4 x i16> [ zeroinitializer, %entry ], [ zeroinitializer, %vector.body ]
+  %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %13, %vector.body ]
+  %1 = icmp sgt <4 x i16> %vec.ind, zeroinitializer
+  %2 = zext <4 x i1> %1 to <4 x i32>
+  %3 = add <4 x i16> %vec.ind, zeroinitializer
+  %4 = icmp sgt <4 x i16> %0, zeroinitializer
+  %5 = zext <4 x i1> %4 to <4 x i32>
+  %6 = or <4 x i32> %2, %5
+  %7 = add <4 x i16> zeroinitializer, zeroinitializer
+  %8 = icmp sgt <4 x i16> %3, zeroinitializer
+  %9 = zext <4 x i1> %8 to <4 x i32>
+  %10 = or <4 x i32> %6, %9
+  %11 = icmp sgt <4 x i16> %7, zeroinitializer
+  %12 = zext <4 x i1> %11 to <4 x i32>
+  %13 = or <4 x i32> %10, %12
+  br label %vector.body
+}

>From eae9ade743e63e2d7cbc8a2c332d01e995d8c93d Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sun, 23 Feb 2025 22:15:33 -0800
Subject: [PATCH 2/3] [SLP][REVEC] Fix scalar mask is passed to
 getScalarizationOverhead but the type is vector.

Fix "Vector size mismatch".
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 24 +++++++++++++++----
 .../SLPVectorizer/SystemZ/revec-fix-128169.ll | 20 ++++++++++++++++
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf256d82ae17d..181fee5adbd10 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13963,15 +13963,31 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
     ShuffledElements.setBit(I);
     ShuffleMask[I] = Res.first->second;
   }
-  if (!DemandedElements.isZero())
-    Cost +=
-        TTI->getScalarizationOverhead(VecTy, DemandedElements, /*Insert=*/true,
-                                      /*Extract=*/false, CostKind, VL);
+  if (!DemandedElements.isZero()) {
+    if (isa<FixedVectorType>(ScalarTy)) {
+      assert(SLPReVec && "Only supported by REVEC.");
+      // We don't need to insert elements one by one. Instead, we can insert the
+      // entire vector into the destination.
+      Cost = 0;
+      unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+      for (unsigned I : seq<unsigned>(VL.size()))
+        if (DemandedElements[I])
+          Cost += TTI->getShuffleCost(
+              TTI::SK_InsertSubvector, VecTy, std::nullopt, CostKind,
+              I * ScalarTyNumElements, cast<FixedVectorType>(ScalarTy));
+    } else {
+      Cost += TTI->getScalarizationOverhead(VecTy, DemandedElements,
+                                            /*Insert=*/true,
+                                            /*Extract=*/false, CostKind, VL);
+    }
+  }
   if (ForPoisonSrc) {
     if (isa<FixedVectorType>(ScalarTy)) {
       assert(SLPReVec && "Only supported by REVEC.");
       // We don't need to insert elements one by one. Instead, we can insert the
       // entire vector into the destination.
+      assert(DemandedElements.isZero() &&
+             "Need to consider the cost from DemandedElements.");
       Cost = 0;
       unsigned ScalarTyNumElements = getNumElements(ScalarTy);
       for (unsigned I : seq<unsigned>(VL.size()))
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
index 08a0d6ade621f..be42207e207a0 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
@@ -2,6 +2,26 @@
 ; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=arch15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
 
 define void @e(<4 x i16> %0) {
+; CHECK-LABEL: @e(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i16> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i16> [[VEC_IND]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i16> [[TMP0:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = or <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i16> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp sgt <4 x i16> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = zext <4 x i1> [[TMP10]] to <4 x i32>
+; CHECK-NEXT:    [[TMP12]] = or <4 x i32> [[TMP9]], [[TMP11]]
+; CHECK-NEXT:    br label [[VECTOR_BODY]]
+;
 entry:
   br label %vector.body
 

>From 0f07caa6db81e6c0651ebb47f78162e50f7f1f5f Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 24 Feb 2025 06:35:24 -0800
Subject: [PATCH 3/3] apply comment

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 181fee5adbd10..c87bdf309907b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13972,9 +13972,9 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
       unsigned ScalarTyNumElements = getNumElements(ScalarTy);
       for (unsigned I : seq<unsigned>(VL.size()))
         if (DemandedElements[I])
-          Cost += TTI->getShuffleCost(
-              TTI::SK_InsertSubvector, VecTy, std::nullopt, CostKind,
-              I * ScalarTyNumElements, cast<FixedVectorType>(ScalarTy));
+          Cost += ::getShuffleCost(*TTI, TTI::SK_InsertSubvector, VecTy, {},
+                                   CostKind, I * ScalarTyNumElements,
+                                   cast<FixedVectorType>(ScalarTy));
     } else {
       Cost += TTI->getScalarizationOverhead(VecTy, DemandedElements,
                                             /*Insert=*/true,



More information about the llvm-commits mailing list