[llvm] 5f53e85 - [SLP]Fix a crash when trying to find reduced ops for the reduced value.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 27 07:32:47 PST 2023


Author: Alexey Bataev
Date: 2023-02-27T07:32:36-08:00
New Revision: 5f53e85f8aa92ff0cf7d674a92491e06683f73ef

URL: https://github.com/llvm/llvm-project/commit/5f53e85f8aa92ff0cf7d674a92491e06683f73ef
DIFF: https://github.com/llvm/llvm-project/commit/5f53e85f8aa92ff0cf7d674a92491e06683f73ef.diff

LOG: [SLP]Fix a crash when trying to find reduced ops for the reduced value.

Need to use original reduced value, not the one the compiler gets after
reduction, it may be replaced by the extractelement instruction already.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/reduction-extracted-value.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ae0396357e065..ab4cfb118bf04 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13043,7 +13043,7 @@ class HorizontalReduction {
           Value *OrigV = TrackedToOrig.find(RdxVal)->second;
           unsigned NumOps =
               VectorizedVals.lookup(RdxVal) + SameValuesCounter[OrigV];
-          if (NumOps != ReducedValsToOps.find(RdxVal)->second.size())
+          if (NumOps != ReducedValsToOps.find(OrigV)->second.size())
             LocalExternallyUsedValues[RdxVal];
         }
         // Do not need the list of reused scalars in regular mode anymore.

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-extracted-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-extracted-value.ll
new file mode 100644
index 0000000000000..a8ba9e059dc2e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-extracted-value.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s
+
+define i32 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i32> zeroinitializer, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i32> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = mul i32 0, [[TMP5]]
+; CHECK-NEXT:    [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], 0
+; CHECK-NEXT:    [[OP_RDX2:%.*]] = mul i32 [[TMP0]], [[TMP0]]
+; CHECK-NEXT:    [[OP_RDX3:%.*]] = mul i32 [[TMP0]], [[TMP0]]
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]]
+; CHECK-NEXT:    [[OP_RDX5:%.*]] = mul i32 [[OP_RDX3]], [[TMP2]]
+; CHECK-NEXT:    [[OP_RDX6:%.*]] = mul i32 [[OP_RDX4]], [[OP_RDX5]]
+; CHECK-NEXT:    ret i32 [[OP_RDX6]]
+;
+bb:
+  %inst5 = add i32 0, 0
+  %0 = extractelement <2 x i32> zeroinitializer, i32 0
+  %inst7 = mul i32 %0, %inst5
+  %1 = extractelement <2 x i32> zeroinitializer, i32 0
+  %inst13 = mul i32 %1, %inst7
+  %inst14 = mul i32 %inst13, 0
+  %2 = extractelement <2 x i32> zeroinitializer, i32 0
+  %inst19 = mul i32 %2, %inst14
+  %inst20 = mul i32 %inst19, 0
+  %3 = extractelement <2 x i32> zeroinitializer, i32 0
+  %inst26 = mul i32 %3, %inst20
+  %inst27 = mul i32 %inst26, 0
+  %4 = or <4 x i32> zeroinitializer, zeroinitializer
+  %5 = extractelement <4 x i32> %4, i32 0
+  %inst31 = mul i32 %5, 0
+  %inst32 = add i32 %inst31, 0
+  %inst33 = mul i32 %5, %inst27
+  %inst34 = mul i32 %inst33, %inst32
+  %6 = extractelement <4 x i32> %4, i32 1
+  %inst39 = mul i32 %6, 0
+  %inst40 = add i32 %inst39, 0
+  %inst41 = mul i32 0, %inst34
+  %inst42 = mul i32 %inst41, %inst40
+  %7 = extractelement <4 x i32> %4, i32 2
+  %inst47 = mul i32 %7, 0
+  %inst48 = add i32 %inst47, 0
+  %inst49 = mul i32 0, %inst42
+  %inst50 = mul i32 %inst49, %inst48
+  %8 = extractelement <4 x i32> %4, i32 3
+  %inst55 = mul i32 %8, 0
+  %inst56 = add i32 %inst55, 0
+  %inst57 = mul i32 0, %inst50
+  %inst58 = mul i32 %inst57, %inst56
+  ret i32 %inst58
+}
+


        


More information about the llvm-commits mailing list