[llvm] 6b1d137 - [SLP]Fix PR101213: Reuse extractelement, only if its vector operand comes before new vector value.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 16:02:55 PDT 2024
Author: Alexey Bataev
Date: 2024-07-30T16:02:46-07:00
New Revision: 6b1d13761ac0c9857763e5f4c0ae554f076dd9b7
URL: https://github.com/llvm/llvm-project/commit/6b1d13761ac0c9857763e5f4c0ae554f076dd9b7
DIFF: https://github.com/llvm/llvm-project/commit/6b1d13761ac0c9857763e5f4c0ae554f076dd9b7.diff
LOG: [SLP]Fix PR101213: Reuse extractelement, only if its vector operand comes before new vector value.
When trying to reuse extractelement instruction, need to check that it
is inserted into proper position. Its original vector operand should
come before new vector value, otherwise new extractelement instruction
must be generated.
Fixes https://github.com/llvm/llvm-project/issues/101213
Added:
llvm/test/Transforms/SLPVectorizer/X86/extract-vectorized-operand.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6ce1077d81cf5..ee8da68b28218 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13927,11 +13927,18 @@ Value *BoUpSLP::vectorizeTree(
}
if (!Ex) {
// "Reuse" the existing extract to improve final codegen.
- if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) {
+ if (auto *ES = dyn_cast<ExtractElementInst>(Scalar);
+ ES && isa<Instruction>(Vec)) {
Value *V = ES->getVectorOperand();
+ auto *IVec = cast<Instruction>(Vec);
if (const TreeEntry *ETE = getTreeEntry(V))
V = ETE->VectorizedValue;
- Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
+ if (auto *IV = dyn_cast<Instruction>(V);
+ !IV || IV == Vec || IV->getParent() != IVec->getParent() ||
+ IV->comesBefore(IVec))
+ Ex = Builder.CreateExtractElement(V, ES->getIndexOperand());
+ else
+ Ex = Builder.CreateExtractElement(Vec, Lane);
} else if (ReplaceGEP) {
// Leave the GEPs as is, they are free in most cases and better to
// keep them as GEPs.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-vectorized-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-vectorized-operand.ll
new file mode 100644
index 0000000000000..f1a5709d07f02
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-vectorized-operand.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: br label %[[BB43:.*]]
+; CHECK: [[BB20:.*]]:
+; CHECK-NEXT: br label %[[BB105:.*]]
+; CHECK: [[BB43]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x ptr addrspace(1)> [ [[TMP1:%.*]], %[[BB51:.*]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NEXT: br i1 false, label %[[BB105]], label %[[BB51]]
+; CHECK: [[BB51]]:
+; CHECK-NEXT: [[TMP1]] = phi <2 x ptr addrspace(1)> [ poison, %[[BB54:.*]] ], [ zeroinitializer, %[[BB43]] ]
+; CHECK-NEXT: br label %[[BB43]]
+; CHECK: [[BB54]]:
+; CHECK-NEXT: br label %[[BB51]]
+; CHECK: [[BB105]]:
+; CHECK-NEXT: [[PHI106:%.*]] = phi ptr addrspace(1) [ null, %[[BB20]] ], [ null, %[[BB43]] ]
+; CHECK-NEXT: ret void
+;
+bb:
+ %0 = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> zeroinitializer, <2 x i32> <i32 1, i32 0>
+ %1 = extractelement <2 x ptr addrspace(1)> %0, i32 0
+ %2 = extractelement <2 x ptr addrspace(1)> %0, i32 1
+ br label %bb43
+
+bb20:
+ br label %bb105
+
+bb43:
+ %phi441 = phi ptr addrspace(1) [ %4, %bb51 ], [ %2, %bb ]
+ %phi452 = phi ptr addrspace(1) [ %5, %bb51 ], [ %1, %bb ]
+ br i1 false, label %bb105, label %bb51
+
+bb51:
+ %3 = phi <2 x ptr addrspace(1)> [ poison, %bb54 ], [ zeroinitializer, %bb43 ]
+ %4 = extractelement <2 x ptr addrspace(1)> %3, i32 0
+ %5 = extractelement <2 x ptr addrspace(1)> %3, i32 1
+ br label %bb43
+
+bb54:
+ br label %bb51
+
+bb105:
+ %phi106 = phi ptr addrspace(1) [ %1, %bb20 ], [ null, %bb43 ]
+ ret void
+}
+
More information about the llvm-commits
mailing list