[llvm] [SLP] Cache users-scalar pairs during tree vectorization (PR #96065)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 19 05:50:32 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Maurice Heumann (momo5502)

<details>
<summary>Changes</summary>

This fixes issue #<!-- -->95925

When solely caching users during tree vectorization, it can happen that insertelement instructions with uses of different scalars get skipped after the first scalar replacement. Caching the processed scalar alongside the user prevents that issue.

---
Full diff: https://github.com/llvm/llvm-project/pull/96065.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+2-2) 
- (added) llvm/test/Transforms/SLPVectorizer/insertelement-all-operands.ll (+42) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ae0819c964bef..ead1363036466 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13631,7 +13631,7 @@ Value *BoUpSLP::vectorizeTree(
   DenseMap<Value *,
            DenseMap<BasicBlock *, std::pair<Instruction *, Instruction *>>>
       ScalarToEEs;
-  SmallDenseSet<Value *, 4> UsedInserts;
+  SmallDenseSet<std::pair<Value *, Value *>, 4> UsedInserts;
   DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
   SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
   // Extract all of the elements with the external uses.
@@ -13775,7 +13775,7 @@ Value *BoUpSLP::vectorizeTree(
       // Skip if the scalar is another vector op or Vec is not an instruction.
       if (!Scalar->getType()->isVectorTy() && isa<Instruction>(Vec)) {
         if (auto *FTy = dyn_cast<FixedVectorType>(User->getType())) {
-          if (!UsedInserts.insert(VU).second)
+          if (!UsedInserts.insert({VU, Scalar}).second)
             continue;
           // Need to use original vector, if the root is truncated.
           auto BWIt = MinBWs.find(E);
diff --git a/llvm/test/Transforms/SLPVectorizer/insertelement-all-operands.ll b/llvm/test/Transforms/SLPVectorizer/insertelement-all-operands.ll
new file mode 100644
index 0000000000000..56023af24693e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/insertelement-all-operands.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s
+
+define fastcc ptr @"testfunc"(ptr %0) {
+; CHECK-LABEL: define fastcc ptr @testfunc(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> <ptr null, ptr poison>, ptr [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i32>
+; CHECK-NEXT:    switch i32 0, label %[[NEWFUNCROOT994:.*]] [
+; CHECK-NEXT:      i32 1, label %[[NEWFUNCROOT994]]
+; CHECK-NEXT:      i32 0, label %[[NEWFUNCROOT584:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[NEWFUNCROOT584]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    ret ptr null
+; CHECK:       [[NEWFUNCROOT994]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[TMP2]], i64 [[TMP3]]
+; CHECK-NEXT:    ret ptr null
+;
+entry:
+  %1 = ptrtoint ptr %0 to i64
+  %2 = trunc i64 %1 to i32
+  %3 = ptrtoint ptr null to i64
+  %4 = trunc i64 %3 to i32
+  switch i32 0, label %newFuncRoot994 [
+  i32 1, label %newFuncRoot994
+  i32 0, label %newFuncRoot584
+  ]
+
+newFuncRoot584:                                   ; preds = %entry
+  %5 = insertelement <4 x i32> poison, i32 %4, i64 0
+  %6 = insertelement <4 x i32> %5, i32 %2, i64 1
+  ret ptr null
+
+newFuncRoot994:                                   ; preds = %entry, %entry
+  %7 = insertelement <4 x i32> zeroinitializer, i32 %2, i64 %3
+  ret ptr null
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/96065


More information about the llvm-commits mailing list