[llvm] 4dd186a - [SLP]Fix PR85082: PHI node has multiple entries.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 13 14:00:40 PDT 2024
Author: Alexey Bataev
Date: 2024-03-13T13:59:58-07:00
New Revision: 4dd186afd502e1e56b8f3d6d923b7f8cfa124572
URL: https://github.com/llvm/llvm-project/commit/4dd186afd502e1e56b8f3d6d923b7f8cfa124572
DIFF: https://github.com/llvm/llvm-project/commit/4dd186afd502e1e56b8f3d6d923b7f8cfa124572.diff
LOG: [SLP]Fix PR85082: PHI node has multiple entries.
Need to record casted extractelement for the externally used scalar, not
original extract instruction.
Added:
llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b8b67609d755fd..739dae3bdd0cff 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12539,7 +12539,9 @@ Value *BoUpSLP::vectorizeTree(
DenseMap<Value *, InsertElementInst *> VectorToInsertElement;
// Maps extract Scalar to the corresponding extractelement instruction in the
// basic block. Only one extractelement per block should be emitted.
- DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs;
+ DenseMap<Value *,
+ DenseMap<BasicBlock *, std::pair<Instruction *, Instruction *>>>
+ ScalarToEEs;
SmallDenseSet<Value *, 4> UsedInserts;
DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
@@ -12568,18 +12570,23 @@ Value *BoUpSLP::vectorizeTree(
auto ExtractAndExtendIfNeeded = [&](Value *Vec) {
if (Scalar->getType() != Vec->getType()) {
Value *Ex = nullptr;
+ Value *ExV = nullptr;
auto It = ScalarToEEs.find(Scalar);
if (It != ScalarToEEs.end()) {
// No need to emit many extracts, just move the only one in the
// current block.
auto EEIt = It->second.find(Builder.GetInsertBlock());
if (EEIt != It->second.end()) {
- Instruction *I = EEIt->second;
+ Instruction *I = EEIt->second.first;
if (Builder.GetInsertPoint() != Builder.GetInsertBlock()->end() &&
- Builder.GetInsertPoint()->comesBefore(I))
+ Builder.GetInsertPoint()->comesBefore(I)) {
I->moveBefore(*Builder.GetInsertPoint()->getParent(),
Builder.GetInsertPoint());
+ if (auto *CI = EEIt->second.second)
+ CI->moveAfter(I);
+ }
Ex = I;
+ ExV = EEIt->second.second ? EEIt->second.second : Ex;
}
}
if (!Ex) {
@@ -12592,8 +12599,16 @@ Value *BoUpSLP::vectorizeTree(
} else {
Ex = Builder.CreateExtractElement(Vec, Lane);
}
+ // If necessary, sign-extend or zero-extend ScalarRoot
+ // to the larger type.
+ ExV = Ex;
+ if (Scalar->getType() != Ex->getType())
+ ExV = Builder.CreateIntCast(Ex, Scalar->getType(),
+ MinBWs.find(E)->second.second);
if (auto *I = dyn_cast<Instruction>(Ex))
- ScalarToEEs[Scalar].try_emplace(Builder.GetInsertBlock(), I);
+ ScalarToEEs[Scalar].try_emplace(
+ Builder.GetInsertBlock(),
+ std::make_pair(I, cast<Instruction>(ExV)));
}
// The then branch of the previous if may produce constants, since 0
// operand might be a constant.
@@ -12601,12 +12616,7 @@ Value *BoUpSLP::vectorizeTree(
GatherShuffleExtractSeq.insert(ExI);
CSEBlocks.insert(ExI->getParent());
}
- // If necessary, sign-extend or zero-extend ScalarRoot
- // to the larger type.
- if (Scalar->getType() != Ex->getType())
- return Builder.CreateIntCast(Ex, Scalar->getType(),
- MinBWs.find(E)->second.second);
- return Ex;
+ return ExV;
}
assert(isa<FixedVectorType>(Scalar->getType()) &&
isa<InsertElementInst>(Scalar) &&
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll
new file mode 100644
index 00000000000000..35f2f9e052e749
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test(i32 %arg) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
+; CHECK-NEXT: br label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: switch i32 0, label [[BB10:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB9:%.*]]
+; CHECK-NEXT: i32 11, label [[BB9]]
+; CHECK-NEXT: i32 1, label [[BB4:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: switch i32 0, label [[BB10]] [
+; CHECK-NEXT: i32 18, label [[BB7:%.*]]
+; CHECK-NEXT: i32 1, label [[BB7]]
+; CHECK-NEXT: i32 0, label [[BB10]]
+; CHECK-NEXT: ]
+; CHECK: bb4:
+; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ [[TMP0]], [[BB2]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr null, i64 [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; CHECK-NEXT: [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr null, i64 [[TMP6]]
+; CHECK-NEXT: ret void
+; CHECK: bb7:
+; CHECK-NEXT: [[PHI8:%.*]] = phi i64 [ [[TMP2]], [[BB3:%.*]] ], [ [[TMP2]], [[BB3]] ]
+; CHECK-NEXT: br label [[BB9]]
+; CHECK: bb9:
+; CHECK-NEXT: ret void
+; CHECK: bb10:
+; CHECK-NEXT: ret void
+;
+bb:
+ %zext = zext i32 %arg to i64
+ %zext1 = zext i32 0 to i64
+ br label %bb2
+
+bb2:
+ switch i32 0, label %bb10 [
+ i32 0, label %bb9
+ i32 11, label %bb9
+ i32 1, label %bb4
+ ]
+
+bb3:
+ switch i32 0, label %bb10 [
+ i32 18, label %bb7
+ i32 1, label %bb7
+ i32 0, label %bb10
+ ]
+
+bb4:
+ %phi = phi i64 [ %zext, %bb2 ]
+ %phi5 = phi i64 [ %zext1, %bb2 ]
+ %getelementptr = getelementptr i32, ptr null, i64 %phi
+ %getelementptr6 = getelementptr i32, ptr null, i64 %phi5
+ ret void
+
+bb7:
+ %phi8 = phi i64 [ %zext, %bb3 ], [ %zext, %bb3 ]
+ br label %bb9
+
+bb9:
+ ret void
+
+bb10:
+ ret void
+}
More information about the llvm-commits
mailing list