[llvm] 39bab1d - [SLP]Check if the operand for removal is the reduction operand, awaiting for the reduction

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 26 14:18:28 PST 2025


Author: Alexey Bataev
Date: 2025-02-26T14:17:11-08:00
New Revision: 39bab1de33333ee3c62b586c4e8d26f8c443bc60

URL: https://github.com/llvm/llvm-project/commit/39bab1de33333ee3c62b586c4e8d26f8c443bc60
DIFF: https://github.com/llvm/llvm-project/commit/39bab1de33333ee3c62b586c4e8d26f8c443bc60.diff

LOG: [SLP]Check if the operand for removal is the reduction operand, awaiting for the reduction

If the operand of the instruction-to-be-removed is a reduction value,
which is not reduced yet, and, thus, it has no users, it may be removed
during operands analysis.

Fixes #128736

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b25b09306aca8..e8c91ebd508ce 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1389,9 +1389,10 @@ class BoUpSLP {
   /// Vectorize the tree but with the list of externally used values \p
   /// ExternallyUsedValues. Values in this MapVector can be replaced but the
   /// generated extractvalue instructions.
-  Value *
-  vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
-                Instruction *ReductionRoot = nullptr);
+  Value *vectorizeTree(
+      const ExtraValueToDebugLocsMap &ExternallyUsedValues,
+      Instruction *ReductionRoot = nullptr,
+      ArrayRef<std::tuple<Value *, unsigned, bool>> VectorValuesAndScales = {});
 
   /// \returns the cost incurred by unwanted spills and fills, caused by
   /// holding live values over call sites.
@@ -2849,11 +2850,13 @@ class BoUpSLP {
   /// Remove instructions from the parent function and clear the operands of \p
   /// DeadVals instructions, marking for deletion trivially dead operands.
   template <typename T>
-  void removeInstructionsAndOperands(ArrayRef<T *> DeadVals) {
+  void removeInstructionsAndOperands(
+      ArrayRef<T *> DeadVals,
+      ArrayRef<std::tuple<Value *, unsigned, bool>> VectorValuesAndScales) {
     SmallVector<WeakTrackingVH> DeadInsts;
     for (T *V : DeadVals) {
       auto *I = cast<Instruction>(V);
-      DeletedInstructions.insert(I);
+      eraseInstruction(I);
     }
     DenseSet<Value *> Processed;
     for (T *V : DeadVals) {
@@ -2915,12 +2918,17 @@ class BoUpSLP {
         // loop iteration.
         if (auto *OpI = dyn_cast<Instruction>(OpV))
           if (!DeletedInstructions.contains(OpI) &&
+              (!OpI->getType()->isVectorTy() ||
+               none_of(VectorValuesAndScales,
+                       [&](const std::tuple<Value *, unsigned, bool> &V) {
+                         return std::get<0>(V) == OpI;
+                       })) &&
               isInstructionTriviallyDead(OpI, TLI))
             DeadInsts.push_back(OpI);
       }
 
       VI->removeFromParent();
-      DeletedInstructions.insert(VI);
+      eraseInstruction(VI);
       SE->forgetValue(VI);
     }
   }
@@ -16466,9 +16474,10 @@ Value *BoUpSLP::vectorizeTree() {
   return vectorizeTree(ExternallyUsedValues);
 }
 
-Value *
-BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
-                       Instruction *ReductionRoot) {
+Value *BoUpSLP::vectorizeTree(
+    const ExtraValueToDebugLocsMap &ExternallyUsedValues,
+    Instruction *ReductionRoot,
+    ArrayRef<std::tuple<Value *, unsigned, bool>> VectorValuesAndScales) {
   // All blocks must be scheduled before any instructions are inserted.
   for (auto &BSIter : BlocksSchedules) {
     scheduleBlock(BSIter.second.get());
@@ -17075,7 +17084,7 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
   // cache correctness.
   // NOTE: removeInstructionAndOperands only marks the instruction for deletion
   // - instructions are not deleted until later.
-  removeInstructionsAndOperands(ArrayRef(RemovedInsts));
+  removeInstructionsAndOperands(ArrayRef(RemovedInsts), VectorValuesAndScales);
 
   Builder.ClearInsertionPoint();
   InstrElementSize.clear();
@@ -20449,8 +20458,8 @@ class HorizontalReduction {
           InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
 
         // Vectorize a tree.
-        Value *VectorizedRoot =
-            V.vectorizeTree(LocalExternallyUsedValues, InsertPt);
+        Value *VectorizedRoot = V.vectorizeTree(
+            LocalExternallyUsedValues, InsertPt, VectorValuesAndScales);
         // Update TrackedToOrig mapping, since the tracked values might be
         // updated.
         for (Value *RdxVal : Candidates) {
@@ -20678,7 +20687,7 @@ class HorizontalReduction {
             Ignore->replaceAllUsesWith(P);
           }
         }
-        V.removeInstructionsAndOperands(RdxOps);
+        V.removeInstructionsAndOperands(RdxOps, VectorValuesAndScales);
       }
     } else if (!CheckForReusedReductionOps) {
       for (ReductionOpsType &RdxOps : ReductionOps)

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll
new file mode 100644
index 0000000000000..799533824c5aa
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver2 < %s | FileCheck %s
+
+define i32 @test(i32 %arg) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[ARG]], i32 1
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[OP_RDX:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> <i64 0, i64 0, i64 poison, i64 poison>, <2 x i64> zeroinitializer, i64 2)
+; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i64> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i64> [[TMP2]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <4 x i32> zeroinitializer, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> [[TMP0]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or <4 x i32> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul <2 x i32> zeroinitializer, [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP11:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP7]], i64 0)
+; CHECK-NEXT:    [[RDX_OP:%.*]] = mul <4 x i32> [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[RDX_OP]], i64 0)
+; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP12]])
+; CHECK-NEXT:    [[OP_RDX]] = mul i32 0, [[TMP13]]
+; CHECK-NEXT:    br label %[[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:
+  %phi = phi i32 [ 0, %bb ], [ %mul37, %bb1 ]
+  %mul = mul i64 0, 0
+  %trunc = trunc i64 %mul to i32
+  %or = or i32 0, %trunc
+  %or2 = or i32 0, %or
+  %or3 = or i32 %or2, 0
+  %mul4 = mul i32 0, %or3
+  %mul5 = mul i32 %or3, 0
+  %mul6 = mul i32 %mul5, %mul4
+  %mul7 = mul i32 %mul6, %mul4
+  %mul8 = mul i32 %mul7, %or3
+  %mul9 = mul i64 0, 0
+  %trunc10 = trunc i64 %mul9 to i32
+  %or11 = or i32 0, %trunc10
+  %or12 = or i32 %arg, %or11
+  %or13 = or i32 %or12, 0
+  %mul14 = mul i32 %or13, %mul8
+  %mul15 = mul i32 %mul14, 0
+  %mul16 = mul i32 %mul15, 0
+  %mul17 = mul i32 %mul16, %or13
+  %shl = shl i64 0, 0
+  %mul18 = mul i64 %shl, 0
+  %trunc19 = trunc i64 %mul18 to i32
+  %or20 = or i32 0, %trunc19
+  %or21 = or i32 0, %or20
+  %or22 = or i32 %or21, 0
+  %mul23 = mul i32 %or22, %mul17
+  %mul24 = mul i32 %mul23, 0
+  %mul25 = mul i32 %mul24, 0
+  %mul26 = mul i32 %mul25, %or22
+  %shl27 = shl i64 0, 0
+  %mul28 = mul i64 %shl27, 0
+  %trunc29 = trunc i64 %mul28 to i32
+  %or30 = or i32 0, %trunc29
+  %or31 = or i32 0, %or30
+  %or32 = or i32 %or31, 0
+  %mul33 = mul i32 0, %or32
+  %mul34 = mul i32 %or32, %mul26
+  %mul35 = mul i32 %mul34, %mul33
+  %mul36 = mul i32 %mul35, %mul33
+  %mul37 = mul i32 %mul36, %or32
+  br label %bb1
+}


        


More information about the llvm-commits mailing list