[llvm] [SLP] Improve gather tree nodes matching when users are PHIs. (PR #70111)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 24 13:07:50 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Valery Dmitriev (valerydmit)

<details>
<summary>Changes</summary>

This is re-commit of #<!-- -->69392 and also fixes issue #<!-- -->69670 which was uncovered with the prior commit.
For delayed gather emission it may be incorrect to use stab instruction as insertion point if it is a PHI operand. For that case insertion point is adjusted to be at the end of block, ensuring that prior dependecy vector code is emitted earlier.

---
Full diff: https://github.com/llvm/llvm-project/pull/70111.diff


3 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+17-4) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll (+2-3) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8fbd4cea04c03a0..3882fe633363b61 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9059,6 +9059,7 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
   // blocks.
   if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp())) {
     TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
+    TEInsertPt = TEInsertBlock->getTerminator();
   } else {
     TEInsertBlock = TEInsertPt->getParent();
   }
@@ -9122,9 +9123,10 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
       const Instruction *InsertPt =
           UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
                   : &getLastInstructionInBundle(UseEI.UserTE);
-      if (!UserPHI && TEInsertPt == InsertPt) {
-        // If 2 gathers are operands of the same non-PHI entry,
-        // compare operands indices, use the earlier one as the base.
+      if (TEInsertPt == InsertPt) {
+        // If 2 gathers are operands of the same entry (regardless of wether
+        // user is PHI or else), compare operands indices, use the earlier one
+        // as the base.
         if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx)
           continue;
         // If the user instruction is used for some reason in different
@@ -11250,7 +11252,18 @@ Value *BoUpSLP::vectorizeTree(
     TE->VectorizedValue = nullptr;
     auto *UserI =
         cast<Instruction>(TE->UserTreeIndices.front().UserTE->VectorizedValue);
-    Builder.SetInsertPoint(PrevVec);
+    // If user is a PHI node, its vector code have to be inserted right before
+    // block terminator. Since the node was delayed, there were some unresolved
+    // dependencies at the moment when stab instruction was emitted. In a case
+    // when any of these dependencies turn out an operand of another PHI, coming
+    // from this same block, position of a stab instruction will become invalid.
+    // The is because source vector that supposed to feed this gather node was
+    // inserted at the end of the block [after stab instruction]. So we need
+    // to adjust insertion point again to the end of block.
+    if (isa<PHINode>(UserI))
+      Builder.SetInsertPoint(PrevVec->getParent()->getTerminator());
+    else
+      Builder.SetInsertPoint(PrevVec);
     Builder.SetCurrentDebugLocation(UserI->getDebugLoc());
     Value *Vec = vectorizeTree(TE, /*PostponedPHIs=*/false);
     PrevVec->replaceAllUsesWith(Vec);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
index 8860cd4d8d141e3..1cef1032bf5d9e9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
@@ -15,7 +15,7 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> <float poison, float undef>, float [[DOTPRE_PRE]], i32 0
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[BB2:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[BB2:%.*]] ]
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP9:%.*]], [[BB2]] ]
@@ -29,9 +29,8 @@ define void @test() {
 ; CHECK-NEXT:    tail call void @foo(float [[MUL]])
 ; CHECK-NEXT:    [[I2:%.*]] = load float, ptr poison, align 4
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> poison, float [[I2]], i32 0
+; CHECK-NEXT:    [[TMP8]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
 ; CHECK-NEXT:    [[TMP9]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP10]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index 28e0b06f6967368..e5d7ad138b4def2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -8,7 +8,7 @@
 ; YAML: Function:        test
 ; YAML: Args:
 ; YAML:   - String:          'Stores SLP vectorized with cost '
-; YAML:   - Cost:            '-3'
+; YAML:   - Cost:            '-6'
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '14'
 ; YAML: ...

``````````

</details>


https://github.com/llvm/llvm-project/pull/70111


More information about the llvm-commits mailing list