[llvm] 1e3536e - [SLP]Fix PR108620: Need to check, if the reduced value was transformed

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 13 15:51:17 PDT 2024


Author: Alexey Bataev
Date: 2024-09-13T15:43:06-07:00
New Revision: 1e3536ef3141d6429f3616af624b81b6d6ab2959

URL: https://github.com/llvm/llvm-project/commit/1e3536ef3141d6429f3616af624b81b6d6ab2959
DIFF: https://github.com/llvm/llvm-project/commit/1e3536ef3141d6429f3616af624b81b6d6ab2959.diff

LOG: [SLP]Fix PR108620: Need to check, if the reduced value was transformed

Before trying to include the scalar into the list of
ExternallyUsedValues, need to check, if it was transformed in previous
iteration and use the transformed value, not the original one, to avoid
compiler crash when building external uses.

Fixes https://github.com/llvm/llvm-project/issues/108620

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0afe02fc08ff21..5f2bf082fb87f0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1283,11 +1283,8 @@ class BoUpSLP {
   /// Vectorize the tree but with the list of externally used values \p
   /// ExternallyUsedValues. Values in this MapVector can be replaced but the
   /// generated extractvalue instructions.
-  /// \param ReplacedExternals containd list of replaced external values
-  /// {scalar, replace} after emitting extractelement for external uses.
   Value *
   vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
-                SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
                 Instruction *ReductionRoot = nullptr);
 
   /// \returns the cost incurred by unwanted spills and fills, caused by
@@ -14222,14 +14219,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
 
 Value *BoUpSLP::vectorizeTree() {
   ExtraValueToDebugLocsMap ExternallyUsedValues;
-  SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
-  return vectorizeTree(ExternallyUsedValues, ReplacedExternals);
+  return vectorizeTree(ExternallyUsedValues);
 }
 
-Value *BoUpSLP::vectorizeTree(
-    const ExtraValueToDebugLocsMap &ExternallyUsedValues,
-    SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
-    Instruction *ReductionRoot) {
+Value *
+BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
+                       Instruction *ReductionRoot) {
   // All blocks must be scheduled before any instructions are inserted.
   for (auto &BSIter : BlocksSchedules) {
     scheduleBlock(BSIter.second.get());
@@ -14373,6 +14368,7 @@ Value *BoUpSLP::vectorizeTree(
   SmallDenseSet<Value *, 4> UsedInserts;
   DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
   SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
+  SmallDenseSet<ExtractElementInst *, 4> IgnoredExtracts;
   // Extract all of the elements with the external uses.
   for (const auto &ExternalUse : ExternalUses) {
     Value *Scalar = ExternalUse.Scalar;
@@ -14426,11 +14422,16 @@ Value *BoUpSLP::vectorizeTree(
           if (ReplaceInst) {
             // Leave the instruction as is, if it cheaper extracts and all
             // operands are scalar.
-            auto *CloneInst = Inst->clone();
-            CloneInst->insertBefore(Inst);
-            if (Inst->hasName())
-              CloneInst->takeName(Inst);
-            Ex = CloneInst;
+            if (auto *EE = dyn_cast<ExtractElementInst>(Inst)) {
+              IgnoredExtracts.insert(EE);
+              Ex = EE;
+            } else {
+              auto *CloneInst = Inst->clone();
+              CloneInst->insertBefore(Inst);
+              if (Inst->hasName())
+                CloneInst->takeName(Inst);
+              Ex = CloneInst;
+            }
           } else if (auto *ES = dyn_cast<ExtractElementInst>(Scalar);
                      ES && isa<Instruction>(Vec)) {
             Value *V = ES->getVectorOperand();
@@ -14530,8 +14531,12 @@ Value *BoUpSLP::vectorizeTree(
       }
       Value *NewInst = ExtractAndExtendIfNeeded(Vec);
       // Required to update internally referenced instructions.
-      Scalar->replaceAllUsesWith(NewInst);
-      ReplacedExternals.emplace_back(Scalar, NewInst);
+      if (Scalar != NewInst) {
+        assert((!isa<ExtractElementInst>(Scalar) ||
+                !IgnoredExtracts.contains(cast<ExtractElementInst>(Scalar))) &&
+               "Extractelements should not be replaced.");
+        Scalar->replaceAllUsesWith(NewInst);
+      }
       continue;
     }
 
@@ -14757,6 +14762,9 @@ Value *BoUpSLP::vectorizeTree(
       if (Entry->getOpcode() == Instruction::GetElementPtr &&
           !isa<GetElementPtrInst>(Scalar))
         continue;
+      if (auto *EE = dyn_cast<ExtractElementInst>(Scalar);
+          EE && IgnoredExtracts.contains(EE))
+        continue;
 #ifndef NDEBUG
       Type *Ty = Scalar->getType();
       if (!Ty->isVoidTy()) {
@@ -17660,7 +17668,6 @@ class HorizontalReduction {
     // because of the vectorization.
     DenseMap<Value *, WeakTrackingVH> TrackedVals(ReducedVals.size() *
                                                   ReducedVals.front().size());
-    SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
 
     // The compare instruction of a min/max is the insertion point for new
     // instructions and may be replaced with a new compare instruction.
@@ -17956,6 +17963,8 @@ class HorizontalReduction {
           if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
             continue;
           Value *RdxVal = Candidates[Cnt];
+          if (auto It = TrackedVals.find(RdxVal); It != TrackedVals.end())
+            RdxVal = It->second;
           if (!Visited.insert(RdxVal).second)
             continue;
           // Check if the scalar was vectorized as part of the vectorization
@@ -18024,8 +18033,8 @@ class HorizontalReduction {
           InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
 
         // Vectorize a tree.
-        Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues,
-                                                ReplacedExternals, InsertPt);
+        Value *VectorizedRoot =
+            V.vectorizeTree(LocalExternallyUsedValues, InsertPt);
 
         Builder.SetInsertPoint(InsertPt);
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll
new file mode 100644
index 00000000000000..bb7964146c44d2
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test(i32 %arg) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP6:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[OP_RDX4:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[ADD17:%.*]] = add i32 [[PHI]], 0
+; CHECK-NEXT:    [[ADD18:%.*]] = add i32 [[PHI2]], 0
+; CHECK-NEXT:    [[ADD19:%.*]] = add i32 [[PHI2]], 0
+; CHECK-NEXT:    [[ADD23:%.*]] = add i32 [[PHI2]], 0
+; CHECK-NEXT:    [[TMP4]] = add <2 x i32> [[TMP0]], <i32 0, i32 1>
+; CHECK-NEXT:    [[TMP5]] = extractelement <2 x i32> [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP6]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> [[TMP3]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = xor i32 [[TMP7]], [[ADD18]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = xor i32 [[ADD17]], [[ADD19]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = xor i32 [[ADD23]], [[TMP6]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = xor i32 [[OP_RDX]], [[OP_RDX1]]
+; CHECK-NEXT:    [[OP_RDX4]] = xor i32 [[OP_RDX3]], [[OP_RDX2]]
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ult i32 [[TMP5]], 0
+; CHECK-NEXT:    br label %[[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:
+  %phi = phi i32 [ 0, %bb ], [ %add27, %bb1 ]
+  %phi2 = phi i32 [ 0, %bb ], [ %add24, %bb1 ]
+  %phi3 = phi i32 [ 0, %bb ], [ %xor26, %bb1 ]
+  %add = add i32 %phi2, 0
+  %add4 = add i32 %phi2, 0
+  %xor = xor i32 %add, %add4
+  %add5 = add i32 %phi, 0
+  %add6 = add i32 %phi2, 0
+  %add7 = add i32 %phi2, 0
+  %xor8 = xor i32 %add6, %xor
+  %xor9 = xor i32 %xor8, %add5
+  %xor10 = xor i32 %xor9, %add7
+  %add11 = add i32 %phi, 0
+  %add12 = add i32 %phi2, 0
+  %add13 = add i32 %phi2, 0
+  %xor14 = xor i32 %add12, %xor10
+  %xor15 = xor i32 %xor14, %add11
+  %xor16 = xor i32 %xor15, %add13
+  %add17 = add i32 %phi, 0
+  %add18 = add i32 %phi2, 0
+  %add19 = add i32 %phi2, 0
+  %xor20 = xor i32 %add18, %xor16
+  %xor21 = xor i32 %xor20, %add17
+  %xor22 = xor i32 %xor21, %add19
+  %add23 = add i32 %phi2, 0
+  %add24 = add i32 %arg, 0
+  %xor25 = xor i32 %add23, %xor22
+  %xor26 = xor i32 %xor25, %add24
+  %add27 = add i32 1, 0
+  %icmp = icmp ult i32 %add27, 0
+  br label %bb1
+}


        


More information about the llvm-commits mailing list