[llvm] 4dd186a - [SLP]Fix PR85082: PHI node has multiple entries.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 13 14:00:40 PDT 2024


Author: Alexey Bataev
Date: 2024-03-13T13:59:58-07:00
New Revision: 4dd186afd502e1e56b8f3d6d923b7f8cfa124572

URL: https://github.com/llvm/llvm-project/commit/4dd186afd502e1e56b8f3d6d923b7f8cfa124572
DIFF: https://github.com/llvm/llvm-project/commit/4dd186afd502e1e56b8f3d6d923b7f8cfa124572.diff

LOG: [SLP]Fix PR85082: PHI node has multiple entries.

Need to record casted extractelement for the externally used scalar, not
original extract instruction.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b8b67609d755fd..739dae3bdd0cff 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12539,7 +12539,9 @@ Value *BoUpSLP::vectorizeTree(
   DenseMap<Value *, InsertElementInst *> VectorToInsertElement;
   // Maps extract Scalar to the corresponding extractelement instruction in the
   // basic block. Only one extractelement per block should be emitted.
-  DenseMap<Value *, DenseMap<BasicBlock *, Instruction *>> ScalarToEEs;
+  DenseMap<Value *,
+           DenseMap<BasicBlock *, std::pair<Instruction *, Instruction *>>>
+      ScalarToEEs;
   SmallDenseSet<Value *, 4> UsedInserts;
   DenseMap<std::pair<Value *, Type *>, Value *> VectorCasts;
   SmallDenseSet<Value *, 4> ScalarsWithNullptrUser;
@@ -12568,18 +12570,23 @@ Value *BoUpSLP::vectorizeTree(
     auto ExtractAndExtendIfNeeded = [&](Value *Vec) {
       if (Scalar->getType() != Vec->getType()) {
         Value *Ex = nullptr;
+        Value *ExV = nullptr;
         auto It = ScalarToEEs.find(Scalar);
         if (It != ScalarToEEs.end()) {
           // No need to emit many extracts, just move the only one in the
           // current block.
           auto EEIt = It->second.find(Builder.GetInsertBlock());
           if (EEIt != It->second.end()) {
-            Instruction *I = EEIt->second;
+            Instruction *I = EEIt->second.first;
             if (Builder.GetInsertPoint() != Builder.GetInsertBlock()->end() &&
-                Builder.GetInsertPoint()->comesBefore(I))
+                Builder.GetInsertPoint()->comesBefore(I)) {
               I->moveBefore(*Builder.GetInsertPoint()->getParent(),
                             Builder.GetInsertPoint());
+              if (auto *CI = EEIt->second.second)
+                CI->moveAfter(I);
+            }
             Ex = I;
+            ExV = EEIt->second.second ? EEIt->second.second : Ex;
           }
         }
         if (!Ex) {
@@ -12592,8 +12599,16 @@ Value *BoUpSLP::vectorizeTree(
           } else {
             Ex = Builder.CreateExtractElement(Vec, Lane);
           }
+          // If necessary, sign-extend or zero-extend ScalarRoot
+          // to the larger type.
+          ExV = Ex;
+          if (Scalar->getType() != Ex->getType())
+            ExV = Builder.CreateIntCast(Ex, Scalar->getType(),
+                                        MinBWs.find(E)->second.second);
           if (auto *I = dyn_cast<Instruction>(Ex))
-            ScalarToEEs[Scalar].try_emplace(Builder.GetInsertBlock(), I);
+            ScalarToEEs[Scalar].try_emplace(
+                Builder.GetInsertBlock(),
+                std::make_pair(I, cast<Instruction>(ExV)));
         }
         // The then branch of the previous if may produce constants, since 0
         // operand might be a constant.
@@ -12601,12 +12616,7 @@ Value *BoUpSLP::vectorizeTree(
           GatherShuffleExtractSeq.insert(ExI);
           CSEBlocks.insert(ExI->getParent());
         }
-        // If necessary, sign-extend or zero-extend ScalarRoot
-        // to the larger type.
-        if (Scalar->getType() != Ex->getType())
-          return Builder.CreateIntCast(Ex, Scalar->getType(),
-                                       MinBWs.find(E)->second.second);
-        return Ex;
+        return ExV;
       }
       assert(isa<FixedVectorType>(Scalar->getType()) &&
              isa<InsertElementInst>(Scalar) &&

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll
new file mode 100644
index 00000000000000..35f2f9e052e749
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/same-scalar-in-same-phi-extract.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test(i32 %arg) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    switch i32 0, label [[BB10:%.*]] [
+; CHECK-NEXT:      i32 0, label [[BB9:%.*]]
+; CHECK-NEXT:      i32 11, label [[BB9]]
+; CHECK-NEXT:      i32 1, label [[BB4:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       bb3:
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    switch i32 0, label [[BB10]] [
+; CHECK-NEXT:      i32 18, label [[BB7:%.*]]
+; CHECK-NEXT:      i32 1, label [[BB7]]
+; CHECK-NEXT:      i32 0, label [[BB10]]
+; CHECK-NEXT:    ]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i32> [ [[TMP0]], [[BB2]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr null, i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
+; CHECK-NEXT:    [[GETELEMENTPTR6:%.*]] = getelementptr i32, ptr null, i64 [[TMP6]]
+; CHECK-NEXT:    ret void
+; CHECK:       bb7:
+; CHECK-NEXT:    [[PHI8:%.*]] = phi i64 [ [[TMP2]], [[BB3:%.*]] ], [ [[TMP2]], [[BB3]] ]
+; CHECK-NEXT:    br label [[BB9]]
+; CHECK:       bb9:
+; CHECK-NEXT:    ret void
+; CHECK:       bb10:
+; CHECK-NEXT:    ret void
+;
+bb:
+  %zext = zext i32 %arg to i64
+  %zext1 = zext i32 0 to i64
+  br label %bb2
+
+bb2:
+  switch i32 0, label %bb10 [
+  i32 0, label %bb9
+  i32 11, label %bb9
+  i32 1, label %bb4
+  ]
+
+bb3:
+  switch i32 0, label %bb10 [
+  i32 18, label %bb7
+  i32 1, label %bb7
+  i32 0, label %bb10
+  ]
+
+bb4:
+  %phi = phi i64 [ %zext, %bb2 ]
+  %phi5 = phi i64 [ %zext1, %bb2 ]
+  %getelementptr = getelementptr i32, ptr null, i64 %phi
+  %getelementptr6 = getelementptr i32, ptr null, i64 %phi5
+  ret void
+
+bb7:
+  %phi8 = phi i64 [ %zext, %bb3 ], [ %zext, %bb3 ]
+  br label %bb9
+
+bb9:
+  ret void
+
+bb10:
+  ret void
+}


        


More information about the llvm-commits mailing list