[llvm] [SLP] Improve gather tree nodes matching when users are PHIs. (PR #70111)

Valery Dmitriev via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 24 13:06:10 PDT 2023


https://github.com/valerydmit created https://github.com/llvm/llvm-project/pull/70111

This is re-commit of #69392 and also fixes issue #69670 which was uncovered with the prior commit.
For delayed gather emission it may be incorrect to use stab instruction as insertion point if it is a PHI operand. For that case insertion point is adjusted to be at the end of block, ensuring that prior dependecy vector code is emitted earlier.

>From f324d4e8dcdde29c36d8e0f7f8a6481c37ef151e Mon Sep 17 00:00:00 2001
From: Valery Dmitriev <valery.n.dmitriev at intel.com>
Date: Wed, 18 Oct 2023 09:05:11 -0700
Subject: [PATCH] [SLP] Improve gather tree nodes matching when users are PHIs.

This is re-commit of #69392 and also fixes issue #69670
which was uncovered with the prior commit.
For delayed gather emission it may be incorrect to use stab
instruction as insertion point if it is a PHI operand.
For that case insertion point is adjusted to be at the end of block,
ensuring that prior dependecy vector code is emitted earlier.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 21 +++++++++++++++----
 .../X86/delayed-gather-emission.ll            |  5 ++---
 .../X86/matching-gather-nodes-phi-users.ll    |  2 +-
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8fbd4cea04c03a0..3882fe633363b61 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9059,6 +9059,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
   // blocks.
   if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp())) {
     TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
+    TEInsertPt = TEInsertBlock->getTerminator();
   } else {
     TEInsertBlock = TEInsertPt->getParent();
   }
@@ -9122,9 +9123,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
       const Instruction *InsertPt =
           UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
                   : &getLastInstructionInBundle(UseEI.UserTE);
-      if (!UserPHI && TEInsertPt == InsertPt) {
-        // If 2 gathers are operands of the same non-PHI entry,
-        // compare operands indices, use the earlier one as the base.
+      if (TEInsertPt == InsertPt) {
+        // If 2 gathers are operands of the same entry (regardless of wether
+        // user is PHI or else), compare operands indices, use the earlier one
+        // as the base.
         if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx)
           continue;
         // If the user instruction is used for some reason in different
@@ -11250,7 +11252,18 @@ Value *BoUpSLP::vectorizeTree(
     TE->VectorizedValue = nullptr;
     auto *UserI =
         cast<Instruction>(TE->UserTreeIndices.front().UserTE->VectorizedValue);
-    Builder.SetInsertPoint(PrevVec);
+    // If user is a PHI node, its vector code have to be inserted right before
+    // block terminator. Since the node was delayed, there were some unresolved
+    // dependencies at the moment when stab instruction was emitted. In a case
+    // when any of these dependencies turn out an operand of another PHI, coming
+    // from this same block, position of a stab instruction will become invalid.
+    // The is because source vector that supposed to feed this gather node was
+    // inserted at the end of the block [after stab instruction]. So we need
+    // to adjust insertion point again to the end of block.
+    if (isa<PHINode>(UserI))
+      Builder.SetInsertPoint(PrevVec->getParent()->getTerminator());
+    else
+      Builder.SetInsertPoint(PrevVec);
     Builder.SetCurrentDebugLocation(UserI->getDebugLoc());
     Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false);
     PrevVec->replaceAllUsesWith(Vec);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
index 8860cd4d8d141e3..1cef1032bf5d9e9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
@@ -15,7 +15,7 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> <float poison, float undef>, float [[DOTPRE_PRE]], i32 0
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[BB2:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[BB2:%.*]] ]
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP9:%.*]], [[BB2]] ]
@@ -29,9 +29,8 @@ define void @test() {
 ; CHECK-NEXT:    tail call void @foo(float [[MUL]])
 ; CHECK-NEXT:    [[I2:%.*]] = load float, ptr poison, align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> poison, float [[I2]], i32 0
+; CHECK-NEXT:    [[TMP8]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
 ; CHECK-NEXT:    [[TMP9]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP10]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index 28e0b06f6967368..e5d7ad138b4def2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -8,7 +8,7 @@
 ; YAML: Function:        test
 ; YAML: Args:
 ; YAML:   - String:          'Stores SLP vectorized with cost '
-; YAML:   - Cost:            '-3'
+; YAML:   - Cost:            '-6'
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '14'
 ; YAML: ...



More information about the llvm-commits mailing list