[llvm] [IRBuilder][ConstantFold] Fold constant for CreateInsertVector. (PR #116229)

Thu Nov 14 05:59:26 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Han-Kuan Chen (HanKuanChen)

<details>
<summary>Changes</summary>



---

Patch is 27.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116229.diff


12 Files Affected:

- (modified) llvm/include/llvm/Analysis/ConstantFolding.h (+3) 
- (modified) llvm/include/llvm/Analysis/InstSimplifyFolder.h (+5) 
- (modified) llvm/include/llvm/Analysis/TargetFolder.h (+10) 
- (modified) llvm/include/llvm/IR/ConstantFolder.h (+6) 
- (modified) llvm/include/llvm/IR/IRBuilder.h (+4-2) 
- (modified) llvm/include/llvm/IR/IRBuilderFolder.h (+3) 
- (modified) llvm/include/llvm/IR/NoFolder.h (+5) 
- (modified) llvm/lib/Analysis/ConstantFolding.cpp (+58) 
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+3-2) 
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll (+6-18) 
- (modified) llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll (+1-10) 
- (modified) llvm/test/Transforms/SLPVectorizer/revec.ll (+17-48) 


``````````diff

diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index 58b38fb8b03674..8c092f06e9faa5 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -205,6 +205,9 @@ Constant *ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
                                       Constant *RHS, Type *Ty,
                                       Instruction *FMFSource);
 
+Constant *ConstantFoldInsertVectorIntrinsic(Type *DstType, Constant *SrcVec,
+                                            Constant *SubVec, Constant *Idx);
+
 /// ConstantFoldLoadThroughBitcast - try to cast constant to destination type
 /// returning null if unsuccessful. Can cast pointer to pointer or pointer to
 /// integer and vice versa if their sizes are equal.
diff --git a/llvm/include/llvm/Analysis/InstSimplifyFolder.h b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
index 430c3edc2f0dc7..2d656495e7c3b1 100644
--- a/llvm/include/llvm/Analysis/InstSimplifyFolder.h
+++ b/llvm/include/llvm/Analysis/InstSimplifyFolder.h
@@ -112,6 +112,11 @@ class InstSimplifyFolder final : public IRBuilderFolder {
     return simplifyShuffleVectorInst(V1, V2, Mask, RetTy, SQ);
   }
 
+  Value *FoldInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+                          Value *Idx) const override {
+    return nullptr;
+  }
+
   Value *FoldCast(Instruction::CastOps Op, Value *V,
                   Type *DestTy) const override {
     return simplifyCastInst(Op, V, DestTy, SQ);
diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h
index 4c78211b5c935c..22d885bf111743 100644
--- a/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/llvm/include/llvm/Analysis/TargetFolder.h
@@ -181,6 +181,16 @@ class TargetFolder final : public IRBuilderFolder {
     return nullptr;
   }
 
+  Value *FoldInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+                          Value *Idx) const override {
+    auto *CSrcVec = dyn_cast<Constant>(SrcVec);
+    auto *CSubVec = dyn_cast<Constant>(SubVec);
+    auto *CIdx = dyn_cast<Constant>(Idx);
+    if (CSrcVec && CSubVec && CIdx)
+      return ConstantFoldInsertVectorIntrinsic(DstType, CSrcVec, CSubVec, CIdx);
+    return nullptr;
+  }
+
   Value *FoldCast(Instruction::CastOps Op, Value *V,
                   Type *DestTy) const override {
     if (auto *C = dyn_cast<Constant>(V))
diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h
index a75cdf97f6ed34..cd83de45ec2260 100644
--- a/llvm/include/llvm/IR/ConstantFolder.h
+++ b/llvm/include/llvm/IR/ConstantFolder.h
@@ -170,6 +170,12 @@ class ConstantFolder final : public IRBuilderFolder {
     return nullptr;
   }
 
+  Value *FoldInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+                          Value *Idx) const override {
+    // Use TargetFolder or InstSimplifyFolder instead.
+    return nullptr;
+  }
+
   Value *FoldCast(Instruction::CastOps Op, Value *V,
                   Type *DestTy) const override {
     if (auto *C = dyn_cast<Constant>(V)) {
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 23fd8350a29b3d..ac040ab5953bfc 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1057,8 +1057,10 @@ class IRBuilderBase {
   }
 
   /// Create a call to the vector.insert intrinsic.
-  CallInst *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
-                               Value *Idx, const Twine &Name = "") {
+  Value *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+                            Value *Idx, const Twine &Name = "") {
+    if (Value *V = Folder.FoldInsertVector(DstType, SrcVec, SubVec, Idx))
+      return V;
     return CreateIntrinsic(Intrinsic::vector_insert,
                            {DstType, SubVec->getType()}, {SrcVec, SubVec, Idx},
                            nullptr, Name);
diff --git a/llvm/include/llvm/IR/IRBuilderFolder.h b/llvm/include/llvm/IR/IRBuilderFolder.h
index 921001c8a5d512..239feddef9c80b 100644
--- a/llvm/include/llvm/IR/IRBuilderFolder.h
+++ b/llvm/include/llvm/IR/IRBuilderFolder.h
@@ -71,6 +71,9 @@ class IRBuilderFolder {
   virtual Value *FoldShuffleVector(Value *V1, Value *V2,
                                    ArrayRef<int> Mask) const = 0;
 
+  virtual Value *FoldInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+                                  Value *Idx) const = 0;
+
   virtual Value *FoldCast(Instruction::CastOps Op, Value *V,
                           Type *DestTy) const = 0;
 
diff --git a/llvm/include/llvm/IR/NoFolder.h b/llvm/include/llvm/IR/NoFolder.h
index c4631a9ba1cbfb..be77521235d029 100644
--- a/llvm/include/llvm/IR/NoFolder.h
+++ b/llvm/include/llvm/IR/NoFolder.h
@@ -107,6 +107,11 @@ class NoFolder final : public IRBuilderFolder {
     return nullptr;
   }
 
+  Value *FoldInsertVector(Type *DstType, Value *SrcVec, Value *SubVec,
+                          Value *Idx) const override {
+    return nullptr;
+  }
+
   Value *FoldCast(Instruction::CastOps Op, Value *V,
                   Type *DestTy) const override {
     return nullptr;
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 1971c28fc4c4de..91a077a570f52b 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -3090,6 +3090,56 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
   return nullptr;
 }
 
+static Constant *ConstantFoldIntrinsicCall3(Intrinsic::ID IntrinsicID, Type *Ty,
+                                            ArrayRef<Constant *> Operands) {
+  assert(Operands.size() == 3 && "Wrong number of operands.");
+  switch (IntrinsicID) {
+  case Intrinsic::vector_insert: {
+    Constant *SrcVec = Operands[0];
+    Constant *SubVec = Operands[1];
+    Constant *Idx = Operands[2];
+    assert(SrcVec->getType()->isVectorTy() && "Destination is not a vector.");
+    assert(SubVec->getType()->isVectorTy() && "Source is not a vector.");
+    // The actual length is unknown.
+    if (isa<ScalableVectorType>(SrcVec->getType()))
+      return nullptr;
+    assert(
+        isa<FixedVectorType>(SubVec->getType()) &&
+        "Scalable vectors can only be inserted into other scalable vectors.");
+    assert(SrcVec->getType()->getScalarType() ==
+               SubVec->getType()->getScalarType() &&
+           "The element type of source does not match the element type of "
+           "destination.");
+    assert(Idx->getType()->isIntegerTy(64) && "Index must be i64.");
+    if (isa<UndefValue>(Idx))
+      return PoisonValue::get(Ty);
+    uint64_t IdxValue = cast<ConstantInt>(Idx)->getValue().getZExtValue();
+    unsigned SubVecNumElements =
+        cast<FixedVectorType>(SubVec->getType())->getNumElements();
+    assert(IdxValue % SubVecNumElements == 0 &&
+           "Index should be a multiple of the length of source.");
+    unsigned SrcVecNumElements =
+        cast<FixedVectorType>(SrcVec->getType())->getNumElements();
+    if (SrcVecNumElements <= IdxValue)
+      return PoisonValue::get(Ty);
+    SmallVector<Constant *, 16> Result;
+    Result.reserve(SrcVecNumElements);
+    auto *Int32Ty = Type::getInt32Ty(SrcVec->getContext());
+    for (unsigned I = 0; I != SrcVecNumElements; ++I) {
+      if (IdxValue <= I && I < IdxValue + SubVecNumElements) {
+        Result.push_back(ConstantExpr::getExtractElement(
+            SubVec, ConstantInt::get(Int32Ty, I - IdxValue)));
+        continue;
+      }
+      Result.push_back(ConstantExpr::getExtractElement(
+          SrcVec, ConstantInt::get(Int32Ty, I)));
+    }
+    return ConstantVector::get(Result);
+  }
+  }
+  return nullptr;
+}
+
 static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
                                                const APFloat &S0,
                                                const APFloat &S1,
@@ -3552,6 +3602,14 @@ Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
                                     dyn_cast_if_present<CallBase>(FMFSource));
 }
 
+Constant *llvm::ConstantFoldInsertVectorIntrinsic(Type *DstType,
+                                                  Constant *SrcVec,
+                                                  Constant *SubVec,
+                                                  Constant *Idx) {
+  return ConstantFoldIntrinsicCall3(Intrinsic::vector_insert, DstType,
+                                    {SrcVec, SubVec, Idx});
+}
+
 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
                                  ArrayRef<Constant *> Operands,
                                  const TargetLibraryInfo *TLI,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 506c243688d9c8..7a599f609cf67e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13511,12 +13511,13 @@ Value *BoUpSLP::gather(
     Instruction *InsElt;
     if (auto *VecTy = dyn_cast<FixedVectorType>(Scalar->getType())) {
       assert(SLPReVec && "FixedVectorType is not expected.");
-      Vec = InsElt = Builder.CreateInsertVector(
+      Vec = Builder.CreateInsertVector(
           Vec->getType(), Vec, Scalar,
           Builder.getInt64(Pos * VecTy->getNumElements()));
-      auto *II = dyn_cast<IntrinsicInst>(InsElt);
+      auto *II = dyn_cast<IntrinsicInst>(Vec);
       if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
         return Vec;
+      InsElt = II;
     } else {
       Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos));
       InsElt = dyn_cast<InsertElementInst>(Vec);
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
index b312688b7932dc..56302460c9875e 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll
@@ -7,8 +7,7 @@ define i32 @test() {
 ; CHECK-NEXT:    br label [[IF_END_I87:%.*]]
 ; CHECK:       if.end.i87:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> <ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64064 to ptr), ptr inttoptr (i64 64064 to ptr)>, <4 x i64> <i64 0, i64 1, i64 0, i64 1>), i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2)
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    switch i32 0, label [[SW_BB509_I:%.*]] [
 ; CHECK-NEXT:      i32 1, label [[SW_BB509_I]]
 ; CHECK-NEXT:      i32 0, label [[IF_THEN458_I:%.*]]
@@ -54,17 +53,9 @@ define void @test2() {
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8)
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16)
 ; CHECK-NEXT:    [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double>
-; CHECK-NEXT:    [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8)
-; CHECK-NEXT:    [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16)
-; CHECK-NEXT:    [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24)
-; CHECK-NEXT:    [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]]
+; CHECK-NEXT:    [[TMP14:%.*]] = fadd <32 x double> zeroinitializer, [[TMP9]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float>
-; CHECK-NEXT:    [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8)
-; CHECK-NEXT:    [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16)
-; CHECK-NEXT:    [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24)
-; CHECK-NEXT:    [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fcmp ogt <32 x float> zeroinitializer, [[TMP15]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -100,20 +91,17 @@ define void @test3(float %0) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY_LR_PH:%.*]]
 ; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2)
 ; CHECK-NEXT:    br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <4 x float> [ zeroinitializer, [[FOR_BODY_LR_PH]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr null, align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0)
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2)
+; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> <i1 true, i1 true, i1 poison, i1 poison>, <2 x i1> [[TMP5]], i64 2)
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0)
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]]
+; CHECK-NEXT:    [[TMP7]] = select <4 x i1> [[TMP6]], <4 x float> [[TMP9]], <4 x float> zeroinitializer
 ; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
index 4b62ef688ca44f..4c295355617e4d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll
@@ -4,16 +4,7 @@
 define i16 @test() {
 ; CHECK-LABEL: define i16 @test() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> <i1 false, i1 false, i1 poison, i1 poison>, <2 x i1> zeroinitializer, i64 2)
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i1> zeroinitializer, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i1> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i64> [[TMP4]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = or <4 x i1> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> zeroinitializer, <4 x i1> [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = sext <4 x i1> [[TMP7]] to <4 x i16>
-; CHECK-NEXT:    [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]])
+; CHECK-NEXT:    [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> zeroinitializer)
 ; CHECK-NEXT:    ret i16 [[TMP9]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll
index aec81086105d68..3703d2c9b603e9 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec.ll
@@ -95,13 +95,10 @@ define void @test4(ptr %in, ptr %out) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x float>, ptr [[IN:%.*]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[TMP0]], i64 0)
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP4]], <8 x float> zeroinitializer, i64 8)
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 8)
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP6]]
-; CHECK-NEXT:    [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], [[TMP5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i1, ptr [[OUT:%.*]], i64 8
 ; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 8)
 ; CHECK-NEXT:    store <8 x i1> [[TMP13]], ptr [[OUT]], align 1
@@ -153,20 +150,15 @@ define <4 x i1> @test6(ptr %in1, ptr %in2) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[IN2:%.*]], align 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> [[TMP0]], i64 0)
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> zeroinitializer, i64 0)
-; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP7]], <4 x i32> zeroinitializer, i64 4)
-; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP8]], <4 x i32> zeroinitializer, i64 8)
-; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP9]], <4 x i32> zeroinitializer, i64 12)
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> [[TMP1]], i64 0)
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i16> [[TMP15]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP17:%.*]] = call <16 x i16> ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/116229