[llvm] c1bcf5d - [SLP]Fix PR61835: Assertion `I->use_empty() && "trying to erase

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 31 14:23:58 PDT 2023


Author: Alexey Bataev
Date: 2023-03-31T14:21:19-07:00
New Revision: c1bcf5dd0a24316c0451dff233b80527fc967c6b

URL: https://github.com/llvm/llvm-project/commit/c1bcf5dd0a24316c0451dff233b80527fc967c6b
DIFF: https://github.com/llvm/llvm-project/commit/c1bcf5dd0a24316c0451dff233b80527fc967c6b.diff

LOG: [SLP]Fix PR61835: Assertion `I->use_empty() && "trying to erase
instruction with users."' failed.

If the externally used scalar is part of the tree and is replaced by
extractelement instruction, need to add generated extractelement
instruction to the list of the ExternallyUsedValues to avoid deletion
during vectorization.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c763f67713fda..570a8efe78a74 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1124,8 +1124,12 @@ class BoUpSLP {
   /// Vectorize the tree but with the list of externally used values \p
   /// ExternallyUsedValues. Values in this MapVector can be replaced but the
   /// generated extractvalue instructions.
-  Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
-                       Instruction *ReductionRoot = nullptr);
+  /// \param ReplacedExternals containd list of replaced external values
+  /// {scalar, replace} after emitting extractelement for external uses.
+  Value *
+  vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
+                SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
+                Instruction *ReductionRoot = nullptr);
 
   /// \returns the cost incurred by unwanted spills and fills, caused by
   /// holding live values over call sites.
@@ -10367,7 +10371,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
 
 Value *BoUpSLP::vectorizeTree() {
   ExtraValueToDebugLocsMap ExternallyUsedValues;
-  return vectorizeTree(ExternallyUsedValues);
+  SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
+  return vectorizeTree(ExternallyUsedValues, ReplacedExternals);
 }
 
 namespace {
@@ -10381,8 +10386,10 @@ struct ShuffledInsertData {
 };
 } // namespace
 
-Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
-                              Instruction *ReductionRoot) {
+Value *BoUpSLP::vectorizeTree(
+    const ExtraValueToDebugLocsMap &ExternallyUsedValues,
+    SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
+    Instruction *ReductionRoot) {
   // All blocks must be scheduled before any instructions are inserted.
   for (auto &BSIter : BlocksSchedules) {
     scheduleBlock(BSIter.second.get());
@@ -10550,14 +10557,9 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
         Builder.SetInsertPoint(&F->getEntryBlock().front());
       }
       Value *NewInst = ExtractAndExtendIfNeeded(Vec);
-      auto &NewInstLocs = ExternallyUsedValues[NewInst];
-      auto It = ExternallyUsedValues.find(Scalar);
-      assert(It != ExternallyUsedValues.end() &&
-             "Externally used scalar is not found in ExternallyUsedValues");
-      NewInstLocs.append(It->second);
-      ExternallyUsedValues.erase(Scalar);
       // Required to update internally referenced instructions.
       Scalar->replaceAllUsesWith(NewInst);
+      ReplacedExternals.emplace_back(Scalar, NewInst);
       continue;
     }
 
@@ -12971,6 +12973,7 @@ class HorizontalReduction {
     DenseMap<Value *, WeakTrackingVH> TrackedVals(
         ReducedVals.size() * ReducedVals.front().size() + ExtraArgs.size());
     BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
+    SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
     ExternallyUsedValues.reserve(ExtraArgs.size() + 1);
     // The same extra argument may be used several times, so log each attempt
     // to use it.
@@ -13262,6 +13265,14 @@ class HorizontalReduction {
         for (Value *RdxVal : VL)
           if (RequiredExtract.contains(RdxVal))
             LocalExternallyUsedValues[RdxVal];
+        // Update LocalExternallyUsedValues for the scalar, replaced by
+        // extractelement instructions.
+        for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) {
+          auto It = ExternallyUsedValues.find(Pair.first);
+          if (It == ExternallyUsedValues.end())
+            continue;
+          LocalExternallyUsedValues[Pair.second].append(It->second);
+        }
         V.buildExternalUses(LocalExternallyUsedValues);
 
         V.computeMinimumValueSizes();
@@ -13331,8 +13342,8 @@ class HorizontalReduction {
           InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
 
         // Vectorize a tree.
-        Value *VectorizedRoot =
-            V.vectorizeTree(LocalExternallyUsedValues, InsertPt);
+        Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues,
+                                                ReplacedExternals, InsertPt);
 
         Builder.SetInsertPoint(InsertPt);
 

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll
new file mode 100644
index 0000000000000..6443de6af6c65
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll
@@ -0,0 +1,113 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -S < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 7
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr [[IDX2]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr [[IDX2]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i64> [[TMP1]], i32 7
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr null, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i64> [[TMP4]], i64 [[TMP3]], i32 0
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX25:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = phi <8 x i64> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[TMP8:%.*]] = add <8 x i64> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP7]])
+; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP8]])
+; CHECK-NEXT:    [[OP_RDX24:%.*]] = add i64 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[OP_RDX25]] = add i64 [[OP_RDX24]], [[TMP2]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  %idx1 = getelementptr [1000 x i64], ptr null, i64 0, i64 9
+  %idx2 = getelementptr [1000 x i64], ptr null, i64 0, i64 7
+  %ld1 = load i64, ptr %idx2, align 8
+  %idx3 = getelementptr [1000 x i64], ptr null, i64 0, i64 8
+  %ld2 = load i64, ptr %idx3, align 16
+  %ld3 = load i64, ptr %idx1, align 8
+  %idx4 = getelementptr [1000 x i64], ptr null, i64 0, i64 10
+  %ld4 = load i64, ptr %idx4, align 16
+  %idx5 = getelementptr [1000 x i64], ptr null, i64 0, i64 11
+  %ld5 = load i64, ptr %idx5, align 8
+  %idx6 = getelementptr [1000 x i64], ptr null, i64 0, i64 12
+  %ld6 = load i64, ptr %idx6, align 16
+  %idx7 = getelementptr [1000 x i64], ptr null, i64 0, i64 13
+  %ld7 = load i64, ptr %idx7, align 8
+  %idx8 = getelementptr [1000 x i64], ptr null, i64 0, i64 14
+  %ld8 = load i64, ptr %idx8, align 16
+  %0 = load i64, ptr %idx2, align 8
+  %1 = load i64, ptr %idx3, align 16
+  %2 = load i64, ptr %idx1, align 8
+  %3 = load i64, ptr %idx4, align 16
+  %4 = load i64, ptr %idx5, align 8
+  %5 = load i64, ptr %idx6, align 16
+  %6 = load i64, ptr %idx7, align 8
+  %7 = load i64, ptr %idx8, align 16
+  %8 = load i64, ptr null, align 8
+  br label %loop
+
+loop:                 ; preds = %loop, %entry
+  %9 = phi i64 [ %ld8, %entry ], [ %7, %loop ]
+  %10 = phi i64 [ %ld7, %entry ], [ %6, %loop ]
+  %11 = phi i64 [ %ld6, %entry ], [ %5, %loop ]
+  %12 = phi i64 [ %ld5, %entry ], [ %4, %loop ]
+  %13 = phi i64 [ %ld4, %entry ], [ %3, %loop ]
+  %14 = phi i64 [ %ld3, %entry ], [ %2, %loop ]
+  %15 = phi i64 [ %ld2, %entry ], [ %1, %loop ]
+  %16 = phi i64 [ %ld1, %entry ], [ %0, %loop ]
+  %phi1 = phi i64 [ 0, %entry ], [ %64, %loop ]
+  %17 = add i64 %16, %15
+  %18 = add i64 %17, %14
+  %19 = add i64 %18, %13
+  %20 = add i64 %19, %12
+  %21 = add i64 %20, %11
+  %22 = add i64 %21, %10
+  %23 = add i64 %22, %9
+  %24 = add i64 %23, %16
+  %25 = add i64 %24, %15
+  %26 = add i64 %25, %14
+  %27 = add i64 %26, %13
+  %28 = add i64 %27, %12
+  %29 = add i64 %28, %11
+  %30 = add i64 %29, %10
+  %31 = add i64 %30, %9
+  %32 = add i64 %31, %16
+  %33 = add i64 %32, %15
+  %34 = add i64 %33, %14
+  %35 = add i64 %34, %13
+  %36 = add i64 %35, %12
+  %37 = add i64 %36, %11
+  %38 = add i64 %37, %10
+  %39 = add i64 %38, %9
+  %40 = add i64 %39, %16
+  %41 = add i64 %40, %15
+  %42 = add i64 %41, %14
+  %43 = add i64 %42, %13
+  %44 = add i64 %43, %12
+  %45 = add i64 %44, %11
+  %46 = add i64 %45, %10
+  %47 = add i64 %46, %9
+  %48 = add i64 %47, %0
+  %49 = add i64 %48, %1
+  %50 = add i64 %49, %2
+  %51 = add i64 %50, %3
+  %52 = add i64 %51, %4
+  %53 = add i64 %52, %5
+  %54 = add i64 %53, %6
+  %55 = add i64 %54, %7
+  %56 = add i64 %55, %8
+  %57 = add i64 %56, %0
+  %58 = add i64 %57, %1
+  %59 = add i64 %58, %2
+  %60 = add i64 %59, %3
+  %61 = add i64 %60, %4
+  %62 = add i64 %61, %5
+  %63 = add i64 %62, %6
+  %64 = add i64 %63, %7
+  br label %loop
+}


        


More information about the llvm-commits mailing list