[llvm] fb86b3d - [SLP]Change the insertion point for outside-block-used nodes and prevec phi operand gathers

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Sun May 18 13:26:37 PDT 2025


Author: Alexey Bataev
Date: 2025-05-18T12:59:36-07:00
New Revision: fb86b3d96b73f4e628288b180ef4e038da8b7bc1

URL: https://github.com/llvm/llvm-project/commit/fb86b3d96b73f4e628288b180ef4e038da8b7bc1
DIFF: https://github.com/llvm/llvm-project/commit/fb86b3d96b73f4e628288b180ef4e038da8b7bc1.diff

LOG: [SLP]Change the insertion point for outside-block-used nodes and prevec phi operand gathers

Need to set the insertion point for (non-schedulable) vector node after
the last instruction in the node to avoid def-use breakage. But it also
causes miscompilation with gather/buildvector operands of the phi nodes,
used in the same phi only in the block.
These nodes supposed to be inserted at the end of the block and after
changing the insertion point for the non-schedulable vec block, it also
may break def-use dependencies. Need to prevector such nodes, to emit
them as early as possible, so the vectorized nodes are inserted before
these nodes.

Fixes #139728

Recommit after revert 60fb92179291e848eb7b04913bdc818d081db296

Reviewers: hiraditya, HanKuanChen, RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/139917

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
    llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
    llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
    llvm/test/Transforms/SLPVectorizer/X86/user-node-with-same-last-instr.ll
    llvm/test/Transforms/SLPVectorizer/revec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index eb339282fdae8..448556330d65d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -16142,16 +16142,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
                 [](Value *V) {
                   return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
                 })) ||
-        all_of(E->Scalars,
-               [](Value *V) {
-                 return isa<PoisonValue>(V) ||
-                        (!isVectorLikeInstWithConstOps(V) &&
-                         isUsedOutsideBlock(V));
-               }) ||
-        (E->isGather() && E->Idx == 0 && all_of(E->Scalars, [](Value *V) {
-           return isa<ExtractElementInst, UndefValue>(V) ||
-                  areAllOperandsNonInsts(V);
-         })))
+        all_of(E->Scalars, [](Value *V) {
+          return isa<PoisonValue>(V) ||
+                 (!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
+        }))
       Res = FindLastInst();
     else
       Res = FindFirstInst();
@@ -17617,6 +17611,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     if (VecTy)
       ScalarTy = getWidenedType(ScalarTy, VecTy->getNumElements());
   }
+  if (E->VectorizedValue)
+    return E->VectorizedValue;
   auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size());
   if (E->isGather()) {
     // Set insert point for non-reduction initial nodes.
@@ -17799,6 +17795,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
           Value *VecOp = NewPhi->getIncomingValueForBlock(IBB);
           NewPhi->addIncoming(VecOp, IBB);
           TreeEntry *OpTE = getOperandEntry(E, I);
+          assert(!OpTE->VectorizedValue && "Expected no vectorized value.");
           OpTE->VectorizedValue = VecOp;
           continue;
         }
@@ -18696,6 +18693,22 @@ Value *BoUpSLP::vectorizeTree(
   else
     Builder.SetInsertPoint(&F->getEntryBlock(), F->getEntryBlock().begin());
 
+ // Vectorize gather operands of the nodes with the external uses only.
+  for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
+    if (TE->isGather() && !TE->VectorizedValue && TE->UserTreeIndex.UserTE &&
+        TE->UserTreeIndex.UserTE->hasState() &&
+        TE->UserTreeIndex.UserTE->State == TreeEntry::Vectorize &&
+        (TE->UserTreeIndex.UserTE->getOpcode() != Instruction::PHI ||
+         TE->UserTreeIndex.UserTE->isAltShuffle()) &&
+        all_of(TE->UserTreeIndex.UserTE->Scalars,
+               [](Value *V) { return isUsedOutsideBlock(V); })) {
+      Instruction &LastInst =
+          getLastInstructionInBundle(TE->UserTreeIndex.UserTE);
+      Builder.SetInsertPoint(&LastInst);
+      Builder.SetCurrentDebugLocation(LastInst.getDebugLoc());
+      (void)vectorizeTree(TE.get());
+    }
+  }
   // Emit gathered loads first to emit better code for the users of those
   // gathered loads.
   for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
index 55e691b39d78c..29589f369c54c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -10,37 +10,38 @@ target triple = "x86_64-apple-macosx10.8.0"
 define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %vertices, i1 %arg) #0 align 2 {
 ; CHECK-LABEL: @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 %arg, label [[RETURN:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br i1 [[ARG:%.*]], label [[RETURN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    br i1 %arg, label [[IF_THEN17_1:%.*]], label [[IF_END22_1:%.*]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[IF_THEN17_1:%.*]], label [[IF_END22_1:%.*]]
 ; CHECK:       for.end36:
 ; CHECK-NEXT:    br label [[FOR_BODY144:%.*]]
 ; CHECK:       for.body144:
-; CHECK-NEXT:    br i1 %arg, label [[FOR_END227:%.*]], label [[FOR_BODY144]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[FOR_END227:%.*]], label [[FOR_BODY144]]
 ; CHECK:       for.end227:
-; CHECK-NEXT:    br i1 %arg, label [[FOR_END271:%.*]], label [[FOR_BODY233:%.*]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[FOR_END271:%.*]], label [[FOR_BODY233:%.*]]
 ; CHECK:       for.body233:
-; CHECK-NEXT:    br i1 %arg, label [[FOR_BODY233]], label [[FOR_END271]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[FOR_BODY233]], label [[FOR_END271]]
 ; CHECK:       for.end271:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ splat (float 0x47EFFFFFE0000000), [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x float> undef, [[TMP0]]
-; CHECK-NEXT:    br i1 %arg, label [[IF_THEN291:%.*]], label [[RETURN]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[IF_THEN291:%.*]], label [[RETURN]]
 ; CHECK:       if.then291:
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 5.000000e-01)
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[TMP2]]
-; CHECK-NEXT:    br i1 %arg, label [[IF_END332:%.*]], label [[IF_ELSE319:%.*]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[IF_END332:%.*]], label [[IF_ELSE319:%.*]]
 ; CHECK:       if.else319:
-; CHECK-NEXT:    br i1 %arg, label [[IF_THEN325:%.*]], label [[IF_END327:%.*]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[IF_THEN325:%.*]], label [[IF_END327:%.*]]
 ; CHECK:       if.then325:
 ; CHECK-NEXT:    br label [[IF_END327]]
 ; CHECK:       if.end327:
-; CHECK-NEXT:    br i1 %arg, label [[IF_THEN329:%.*]], label [[IF_END332]]
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> <float poison, float undef>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    br i1 [[ARG]], label [[IF_THEN329:%.*]], label [[IF_END332]]
 ; CHECK:       if.then329:
 ; CHECK-NEXT:    br label [[IF_END332]]
 ; CHECK:       if.end332:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ splat (float 0x3F847AE140000000), [[IF_THEN291]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x float> [ [[TMP6]], [[IF_THEN329]] ], [ [[TMP6]], [[IF_END327]] ], [ splat (float 0x3F847AE140000000), [[IF_THEN291]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[VERTICES:%.*]], align 4
 ; CHECK-NEXT:    br label [[RETURN]]
@@ -49,11 +50,11 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %ve
 ; CHECK:       if.then17.1:
 ; CHECK-NEXT:    br label [[IF_END22_1]]
 ; CHECK:       if.end22.1:
-; CHECK-NEXT:    br i1 %arg, label [[IF_THEN17_2:%.*]], label [[IF_END22_2:%.*]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[IF_THEN17_2:%.*]], label [[IF_END22_2:%.*]]
 ; CHECK:       if.then17.2:
 ; CHECK-NEXT:    br label [[IF_END22_2]]
 ; CHECK:       if.end22.2:
-; CHECK-NEXT:    br i1 %arg, label [[FOR_END36:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[ARG]], label [[FOR_END36:%.*]], label [[FOR_BODY]]
 ;
 entry:
   br i1 %arg, label %return, label %if.end

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
index 5b936f65a3221..6fa33671a7b53 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-bv-schedulable.ll
@@ -7,11 +7,11 @@ define void @test() {
 ; CHECK-NEXT:    br i1 false, label %[[BB1:.*]], label %[[BB5:.*]]
 ; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], %[[BB1]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP0]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 0, i32 0
 ; CHECK-NEXT:    [[TMP3]] = or <2 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP0]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
 ; CHECK-NEXT:    [[OR3:%.*]] = or i32 [[TMP6]], 0
 ; CHECK-NEXT:    br i1 false, label %[[BB1]], label %[[BB5]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll b/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
new file mode 100644
index 0000000000000..1c482e079bb0f
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define i64 @test() {
+; CHECK-LABEL: define i64 @test() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 0, i32 1
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB5:.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP4]] = or <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    br label %[[BB5]]
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    br i1 false, label %[[BB6:.*]], label %[[BB1]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x i32> [ [[TMP2]], %[[BB5]] ]
+; CHECK-NEXT:    ret i64 0
+;
+bb:
+  br label %bb1
+
+bb1:
+  %phi = phi i32 [ 0, %bb ], [ %or, %bb5 ]
+  %phi2 = phi i32 [ 0, %bb ], [ %or4, %bb5 ]
+  %or = or i32 %phi, 0
+  %add = add i32 0, 0
+  %or3 = or i32 %add, %phi2
+  %or4 = or i32 %or3, 0
+  br label %bb5
+
+bb5:
+  br i1 false, label %bb6, label %bb1
+
+bb6:
+  %phi7 = phi i32 [ %or, %bb5 ]
+  %phi8 = phi i32 [ %or3, %bb5 ]
+  ret i64 0
+}

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
index 6e770bdf6eb0c..af165de293005 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
@@ -5,8 +5,8 @@ define void @test(float %0) {
 ; CHECK-LABEL: define void @test(
 ; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 0>
+; CHECK-NEXT:    [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = fdiv <2 x float> [[TMP6]], zeroinitializer
 ; CHECK-NEXT:    br label %[[BB6:.*]]
 ; CHECK:       [[BB6]]:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/user-node-with-same-last-instr.ll b/llvm/test/Transforms/SLPVectorizer/X86/user-node-with-same-last-instr.ll
index c66071015bbcb..c3135f19a326d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/user-node-with-same-last-instr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/user-node-with-same-last-instr.ll
@@ -11,10 +11,10 @@ define void @wombat(i32 %arg) {
 ; CHECK:       [[BB2]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP4:%.*]], %[[BB4:.*]] ], [ zeroinitializer, %[[BB1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 [[ARG]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP4]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub <2 x i32> [[TMP0]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = and <2 x i32> [[TMP0]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP7]], <2 x i32> <i32 0, i32 3>

diff  --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index 36dbeed9bbcd5..afe92f89ac0d1 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -415,6 +415,10 @@ define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> [[TMP0:%.*]], i64 0)
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP9:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP10:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP9]], <8 x i32> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP5:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP10]], <8 x i32> zeroinitializer, i64 16)
+; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP5]], <8 x i32> zeroinitializer, i64 24)
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT:%.*]]
 ; CHECK:       for.end.loopexit:
@@ -454,6 +458,10 @@ define void @test14(<8 x i1> %0) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i1> [[TMP1]], <16 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP3:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP9:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> poison, <8 x i16> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[TMP10:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP9]], <8 x i16> zeroinitializer, i64 8)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP10]], <8 x i16> zeroinitializer, i64 16)
+; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP7]], <8 x i16> zeroinitializer, i64 24)
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    br label [[FOR_END_LOOPEXIT:%.*]]
 ; CHECK:       for.end.loopexit:


        


More information about the llvm-commits mailing list