[llvm] [SLP] Improve gather tree nodes matching when users are PHIs. (PR #70111)
Valery Dmitriev via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 13:06:10 PDT 2023
https://github.com/valerydmit created https://github.com/llvm/llvm-project/pull/70111
This is re-commit of #69392 and also fixes issue #69670 which was uncovered with the prior commit.
For delayed gather emission it may be incorrect to use stab instruction as insertion point if it is a PHI operand. For that case insertion point is adjusted to be at the end of block, ensuring that prior dependecy vector code is emitted earlier.
>From f324d4e8dcdde29c36d8e0f7f8a6481c37ef151e Mon Sep 17 00:00:00 2001
From: Valery Dmitriev <valery.n.dmitriev at intel.com>
Date: Wed, 18 Oct 2023 09:05:11 -0700
Subject: [PATCH] [SLP] Improve gather tree nodes matching when users are PHIs.
This is re-commit of #69392 and also fixes issue #69670
which was uncovered with the prior commit.
For delayed gather emission it may be incorrect to use stab
instruction as insertion point if it is a PHI operand.
For that case insertion point is adjusted to be at the end of block,
ensuring that prior dependecy vector code is emitted earlier.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 21 +++++++++++++++----
.../X86/delayed-gather-emission.ll | 5 ++---
.../X86/matching-gather-nodes-phi-users.ll | 2 +-
3 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8fbd4cea04c03a0..3882fe633363b61 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9059,6 +9059,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
// blocks.
if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp())) {
TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
+ TEInsertPt = TEInsertBlock->getTerminator();
} else {
TEInsertBlock = TEInsertPt->getParent();
}
@@ -9122,9 +9123,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
const Instruction *InsertPt =
UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
: &getLastInstructionInBundle(UseEI.UserTE);
- if (!UserPHI && TEInsertPt == InsertPt) {
- // If 2 gathers are operands of the same non-PHI entry,
- // compare operands indices, use the earlier one as the base.
+ if (TEInsertPt == InsertPt) {
+ // If 2 gathers are operands of the same entry (regardless of wether
+ // user is PHI or else), compare operands indices, use the earlier one
+ // as the base.
if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx)
continue;
// If the user instruction is used for some reason in different
@@ -11250,7 +11252,18 @@ Value *BoUpSLP::vectorizeTree(
TE->VectorizedValue = nullptr;
auto *UserI =
cast<Instruction>(TE->UserTreeIndices.front().UserTE->VectorizedValue);
- Builder.SetInsertPoint(PrevVec);
+ // If user is a PHI node, its vector code have to be inserted right before
+ // block terminator. Since the node was delayed, there were some unresolved
+ // dependencies at the moment when stab instruction was emitted. In a case
+ // when any of these dependencies turn out an operand of another PHI, coming
+ // from this same block, position of a stab instruction will become invalid.
+ // The is because source vector that supposed to feed this gather node was
+ // inserted at the end of the block [after stab instruction]. So we need
+ // to adjust insertion point again to the end of block.
+ if (isa<PHINode>(UserI))
+ Builder.SetInsertPoint(PrevVec->getParent()->getTerminator());
+ else
+ Builder.SetInsertPoint(PrevVec);
Builder.SetCurrentDebugLocation(UserI->getDebugLoc());
Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false);
PrevVec->replaceAllUsesWith(Vec);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
index 8860cd4d8d141e3..1cef1032bf5d9e9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
@@ -15,7 +15,7 @@ define void @test() {
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> <float poison, float undef>, float [[DOTPRE_PRE]], i32 0
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
-; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[BB2:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[BB2:%.*]] ]
; CHECK-NEXT: br label [[BB2]]
; CHECK: bb2:
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP9:%.*]], [[BB2]] ]
@@ -29,9 +29,8 @@ define void @test() {
; CHECK-NEXT: tail call void @foo(float [[MUL]])
; CHECK-NEXT: [[I2:%.*]] = load float, ptr poison, align 4
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[I2]], i32 0
+; CHECK-NEXT: [[TMP8]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
; CHECK-NEXT: [[TMP9]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index 28e0b06f6967368..e5d7ad138b4def2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -8,7 +8,7 @@
; YAML: Function: test
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '-3'
+; YAML: - Cost: '-6'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '14'
; YAML: ...
More information about the llvm-commits
mailing list