[llvm] [SLP][REVEC] getScalarizationOverhead should not be used when ScalarTy is FixedVectorType. (PR #117536)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 25 02:29:59 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Han-Kuan Chen (HanKuanChen)

<details>
<summary>Changes</summary>

reference: https://github.com/llvm/llvm-project/issues/117393

---
Full diff: https://github.com/llvm/llvm-project/pull/117536.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+14-2) 
- (added) llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-117393.ll (+30) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d033b7c2ef4a92..f208ad9c9e1c38 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9616,8 +9616,20 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) {
     Cost += ::getShuffleCost(*TTI, TTI::SK_InsertSubvector, VecTy, {}, CostKind,
                              Idx, getWidenedType(ScalarTy, Sz));
   }
-  Cost += TTI->getScalarizationOverhead(VecTy, DemandedElts, /*Insert=*/true,
-                                        /*Extract=*/false, CostKind);
+  if (isa<FixedVectorType>(ScalarTy)) {
+    assert(SLPReVec && "Only supported by REVEC.");
+    // If ScalarTy is FixedVectorType, we should use CreateInsertVector instead
+    // of CreateInsertElement.
+    unsigned ScalarTyNumElements = getNumElements(ScalarTy);
+    for (unsigned I : seq<unsigned>(TE.Scalars.size()))
+      if (DemandedElts[I])
+        Cost += TTI->getShuffleCost(
+            TTI::SK_InsertSubvector, VecTy, std::nullopt, CostKind,
+            I * ScalarTyNumElements, cast<FixedVectorType>(ScalarTy));
+  } else {
+    Cost += TTI->getScalarizationOverhead(VecTy, DemandedElts, /*Insert=*/true,
+                                          /*Extract=*/false, CostKind);
+  }
   int Sz = TE.Scalars.size();
   SmallVector<int> ReorderMask(TE.ReorderIndices.begin(),
                                TE.ReorderIndices.end());
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-117393.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-117393.ll
new file mode 100644
index 00000000000000..c40e32baad7b31
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-117393.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=systemz-unknown -mcpu=z15 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @h() {
+; CHECK-LABEL: @h(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> splat (i32 1), zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i32> [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> zeroinitializer, [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i32> [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP7]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = shl <4 x i32> zeroinitializer, zeroinitializer
+  %1 = or <4 x i32> %0, zeroinitializer
+  %2 = or <4 x i32> splat (i32 1), zeroinitializer
+  %3 = or <4 x i32> zeroinitializer, zeroinitializer
+  %4 = shl <4 x i32> zeroinitializer, zeroinitializer
+  %5 = or <4 x i32> %4, zeroinitializer
+  %6 = and <4 x i32> %2, %1
+  %7 = and <4 x i32> %3, %6
+  %8 = and <4 x i32> %5, %7
+  %9 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %8)
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/117536


More information about the llvm-commits mailing list