[llvm] 9a7248f - [SLP]Fix crash for scalarized vectors.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed May 17 06:34:08 PDT 2023
Author: Alexey Bataev
Date: 2023-05-17T06:32:22-07:00
New Revision: 9a7248f56164b44b07df421384c12541a91e6d84
URL: https://github.com/llvm/llvm-project/commit/9a7248f56164b44b07df421384c12541a91e6d84
DIFF: https://github.com/llvm/llvm-project/commit/9a7248f56164b44b07df421384c12541a91e6d84.diff
LOG: [SLP]Fix crash for scalarized vectors.
Need to remove insertion of the nodes to the InVector in case of
scalarized vectors too to avoid compiler crashes.
Added:
llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1fceae5f26106..fd4fee09c9e00 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7028,10 +7028,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
// If the resulting type is scalarized, do not adjust the cost.
unsigned VecNumParts = TTI.getNumberOfParts(VecTy);
- if (VecNumParts == VecTy->getNumElements()) {
- InVectors.assign(1, E);
+ if (VecNumParts == VecTy->getNumElements())
return nullptr;
- }
DenseMap<Value *, int> ExtractVectorsTys;
for (auto [I, V] : enumerate(VL)) {
// Ignore non-extractelement scalars.
diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll
new file mode 100644
index 0000000000000..e1c3d9affd182
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/buildvector-scalarized.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes=slp-vectorizer -S -mtriple=nvptx64-unknown-unknown < %s | FileCheck %s
+
+define <2 x float> @baz() {
+; CHECK-LABEL: define <2 x float> @baz() {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <2 x float> zeroinitializer, i64 0
+; CHECK-NEXT: [[FCMP:%.*]] = fcmp uno float [[EXTRACTELEMENT]], 0.000000e+00
+; CHECK-NEXT: [[FCMP1:%.*]] = fcmp uno float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[FCMP]], [[FCMP1]]
+; CHECK-NEXT: [[FCMP2:%.*]] = fcmp oeq float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[OR3:%.*]] = or i1 [[FCMP2]], [[OR]]
+; CHECK-NEXT: [[FCMP4:%.*]] = fcmp oeq float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[OR5:%.*]] = or i1 [[FCMP4]], [[OR3]]
+; CHECK-NEXT: br i1 [[OR5]], label [[BB6:%.*]], label [[BB7:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: ret <2 x float> zeroinitializer
+; CHECK: bb7:
+; CHECK-NEXT: ret <2 x float> zeroinitializer
+;
+bb:
+ %extractelement = extractelement <2 x float> zeroinitializer, i64 0
+ %fcmp = fcmp uno float %extractelement, 0.000000e+00
+ %fcmp1 = fcmp uno float 0.000000e+00, 0.000000e+00
+ %or = or i1 %fcmp, %fcmp1
+ %fcmp2 = fcmp oeq float 0.000000e+00, 0.000000e+00
+ %or3 = or i1 %fcmp2, %or
+ %fcmp4 = fcmp oeq float 0.000000e+00, 0.000000e+00
+ %or5 = or i1 %fcmp4, %or3
+ br i1 %or5, label %bb6, label %bb7
+
+bb6: ; preds = %bb
+ ret <2 x float> zeroinitializer
+
+bb7: ; preds = %bb
+ ret <2 x float> zeroinitializer
+}
More information about the llvm-commits
mailing list