[llvm] 98bb354 - [SLP]Fix PR107037: correctly track origonal/modified after vectorizations reduced values

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 3 15:51:52 PDT 2024


Author: Alexey Bataev
Date: 2024-09-03T15:49:19-07:00
New Revision: 98bb354a0add4aeb614430f48a23f87992166239

URL: https://github.com/llvm/llvm-project/commit/98bb354a0add4aeb614430f48a23f87992166239
DIFF: https://github.com/llvm/llvm-project/commit/98bb354a0add4aeb614430f48a23f87992166239.diff

LOG: [SLP]Fix PR107037: correctly track origonal/modified after vectorizations reduced values

Need to correctly track reduced values with multiple uses in the same
reduction emission attempt. Otherwise, the number of the reuses might be
calculated incorrectly, and may cause compiler crash.

Fixes https://github.com/llvm/llvm-project/issues/107037

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cf802034cd56a3..5ac2e0c5586bbe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17778,10 +17778,12 @@ class HorizontalReduction {
       // Emit code for constant values.
       if (Candidates.size() > 1 && allConstant(Candidates)) {
         Value *Res = Candidates.front();
-        ++VectorizedVals.try_emplace(Candidates.front(), 0).first->getSecond();
+        Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
+        ++VectorizedVals.try_emplace(OrigV).first->getSecond();
         for (Value *VC : ArrayRef(Candidates).drop_front()) {
           Res = createOp(Builder, RdxKind, Res, VC, "const.rdx", ReductionOps);
-          ++VectorizedVals.try_emplace(VC, 0).first->getSecond();
+          Value *OrigV = TrackedToOrig.find(VC)->second;
+          ++VectorizedVals.try_emplace(OrigV).first->getSecond();
           if (auto *ResI = dyn_cast<Instruction>(Res))
             V.analyzedReductionRoot(ResI);
         }
@@ -17802,8 +17804,10 @@ class HorizontalReduction {
       // Gather same values.
       MapVector<Value *, unsigned> SameValuesCounter;
       if (IsSupportedHorRdxIdentityOp)
-        for (Value *V : Candidates)
-          ++SameValuesCounter.insert(std::make_pair(V, 0)).first->second;
+        for (Value *V : Candidates) {
+          Value *OrigV = TrackedToOrig.find(V)->second;
+          ++SameValuesCounter.try_emplace(OrigV).first->second;
+        }
       // Used to check if the reduced values used same number of times. In this
       // case the compiler may produce better code. E.g. if reduced values are
       // aabbccdd (8 x values), then the first node of the tree will have a node
@@ -17827,12 +17831,12 @@ class HorizontalReduction {
                    });
         Candidates.resize(SameValuesCounter.size());
         transform(SameValuesCounter, Candidates.begin(),
-                  [](const auto &P) { return P.first; });
+                  [&](const auto &P) { return TrackedVals.at(P.first); });
         NumReducedVals = Candidates.size();
         // Have a reduction of the same element.
         if (NumReducedVals == 1) {
           Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
-          unsigned Cnt = SameValuesCounter.lookup(OrigV);
+          unsigned Cnt = SameValuesCounter.find(OrigV)->second;
           Value *RedVal =
               emitScaleForReusedOps(Candidates.front(), Builder, Cnt);
           VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
@@ -17937,7 +17941,7 @@ class HorizontalReduction {
               continue;
             Value *V = Candidates[Cnt];
             Value *OrigV = TrackedToOrig.find(V)->second;
-            ++SameValuesCounter[OrigV];
+            ++SameValuesCounter.find(OrigV)->second;
           }
         }
         SmallPtrSet<Value *, 4> VLScalars(VL.begin(), VL.end());
@@ -17956,8 +17960,8 @@ class HorizontalReduction {
             continue;
           }
           Value *OrigV = TrackedToOrig.find(RdxVal)->second;
-          unsigned NumOps =
-              VectorizedVals.lookup(RdxVal) + SameValuesCounter[OrigV];
+          unsigned NumOps = VectorizedVals.lookup(OrigV) +
+                            SameValuesCounter.find(OrigV)->second;
           if (NumOps != ReducedValsToOps.find(OrigV)->second.size())
             LocalExternallyUsedValues[RdxVal];
         }
@@ -18085,10 +18089,11 @@ class HorizontalReduction {
         for (Value *RdxVal : VL) {
           Value *OrigV = TrackedToOrig.find(RdxVal)->second;
           if (IsSupportedHorRdxIdentityOp) {
-            VectorizedVals.try_emplace(OrigV, SameValuesCounter[RdxVal]);
+            VectorizedVals.try_emplace(OrigV,
+                                       SameValuesCounter.find(OrigV)->second);
             continue;
           }
-          ++VectorizedVals.try_emplace(OrigV, 0).first->getSecond();
+          ++VectorizedVals.try_emplace(OrigV).first->getSecond();
           if (!V.isVectorized(RdxVal))
             RequiredExtract.insert(RdxVal);
         }
@@ -18099,10 +18104,10 @@ class HorizontalReduction {
       }
       if (OptReusedScalars && !AnyVectorized) {
         for (const std::pair<Value *, unsigned> &P : SameValuesCounter) {
-          Value *RedVal = emitScaleForReusedOps(P.first, Builder, P.second);
+          Value *RdxVal = TrackedVals.find(P.first)->second;
+          Value *RedVal = emitScaleForReusedOps(RdxVal, Builder, P.second);
           VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
-          Value *OrigV = TrackedToOrig.find(P.first)->second;
-          VectorizedVals.try_emplace(OrigV, P.second);
+          VectorizedVals.try_emplace(P.first, P.second);
         }
         continue;
       }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
new file mode 100644
index 00000000000000..e012cc60960b3c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-tracked-reduced-value.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i8 @test() {
+; CHECK-LABEL: define i8 @test() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 0 to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 0 to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 0 to i8
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 0 to i8
+; CHECK-NEXT:    [[TMP4:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> zeroinitializer)
+; CHECK-NEXT:    [[OP_RDX:%.*]] = or i8 [[TMP4]], [[TMP0]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = or i8 [[OP_RDX]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = or i8 [[OP_RDX1]], [[TMP0]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i8 [[OP_RDX2]], [[TMP1]]
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = or i8 [[OP_RDX3]], [[TMP3]]
+; CHECK-NEXT:    ret i8 [[OP_RDX4]]
+;
+entry:
+  %0 = trunc i32 0 to i8
+  %1 = add i8 %0, 0
+  %2 = add i8 %0, 0
+  %3 = add i8 %0, 0
+  %4 = add i8 %0, 0
+  %5 = trunc i32 0 to i8
+  %6 = or i8 %5, %0
+  %7 = or i8 %6, %2
+  %8 = or i8 %7, %3
+  %9 = or i8 %8, %0
+  %10 = or i8 %9, %4
+  %conv4 = or i8 %10, %1
+  %11 = trunc i32 0 to i8
+  %12 = add i8 %11, 0
+  %conv7 = or i8 %conv4, %12
+  %13 = add i8 %11, 0
+  %14 = add i8 %11, 0
+  %15 = add i8 %11, 0
+  %16 = trunc i32 0 to i8
+  %17 = or i8 %13, %16
+  %18 = or i8 %17, %14
+  %19 = or i8 %18, %11
+  %20 = or i8 %19, %15
+  %conv5 = or i8 %20, %conv7
+  %21 = trunc i32 0 to i8
+  %conv6 = or i8 %21, %conv5
+  ret i8 %conv6
+}


        


More information about the llvm-commits mailing list