[llvm] [VectorCombine] Remove dead node immediately in VectorCombine (PR #149047)

Sun Aug 17 13:29:49 PDT 2025

https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/149047

>From 4c8a957c7ed4f415ee0b0da3315749ecc1639ccc Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 17 Aug 2025 21:04:35 +0100
Subject: [PATCH] [VectorCombine] Remove dead node immediately in VectorCombine

The vector combiner will process all instructions as it first loops through the
function, adding any newly added and deleted instructions to a worklist which
is then processed when all nodes are done. These leaves extra uses in the graph
as the initial processing is performed, leading to sub-optimal decisions being
made for other combines. This changes it so that trivially dead instructions
are removed immediately. The main change that this requires is to make sure
iterator invalidation does not occur.
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 212 +++++++++++-------
 .../PhaseOrdering/AArch64/interleave_vec.ll   |  42 +---
 .../test/Transforms/PhaseOrdering/X86/hadd.ll |  10 +-
 .../test/Transforms/PhaseOrdering/X86/hsub.ll |  10 +-
 .../X86/extract-binop-inseltpoison.ll         |  12 +-
 .../VectorCombine/X86/extract-binop.ll        |  18 +-
 .../VectorCombine/X86/extract-cmp.ll          |   2 +-
 .../X86/load-extractelement-scalarization.ll  |   2 +
 .../VectorCombine/X86/load-inseltpoison.ll    |   6 +-
 9 files changed, 160 insertions(+), 154 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d5f1018a2c052..79c544d1e692d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -112,10 +112,8 @@ class VectorCombine {
                              const Instruction &I,
                              ExtractElementInst *&ConvertToShuffle,
                              unsigned PreferredExtractIndex);
-  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
-                     Instruction &I);
-  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
-                       Instruction &I);
+  Value *foldExtExtCmp(Value *V0, Value *V1, Value *ExtIndex, Instruction &I);
+  Value *foldExtExtBinop(Value *V0, Value *V1, Value *ExtIndex, Instruction &I);
   bool foldExtractExtract(Instruction &I);
   bool foldInsExtFNeg(Instruction &I);
   bool foldInsExtBinop(Instruction &I);
@@ -145,7 +143,7 @@ class VectorCombine {
   bool shrinkLoadForShuffles(Instruction &I);
   bool shrinkPhiOfShuffles(Instruction &I);
 
-  void replaceValue(Value &Old, Value &New) {
+  void replaceValue(Instruction &Old, Value &New, bool Erase = true) {
     LLVM_DEBUG(dbgs() << "VC: Replacing: " << Old << '\n');
     LLVM_DEBUG(dbgs() << "         With: " << New << '\n');
     Old.replaceAllUsesWith(&New);
@@ -154,7 +152,11 @@ class VectorCombine {
       Worklist.pushUsersToWorkList(*NewI);
       Worklist.pushValue(NewI);
     }
-    Worklist.pushValue(&Old);
+    if (Erase && isInstructionTriviallyDead(&Old)) {
+      eraseInstruction(Old);
+    } else {
+      Worklist.push(&Old);
+    }
   }
 
   void eraseInstruction(Instruction &I) {
@@ -165,11 +167,23 @@ class VectorCombine {
 
     // Push remaining users of the operands and then the operand itself - allows
     // further folds that were hindered by OneUse limits.
-    for (Value *Op : Ops)
-      if (auto *OpI = dyn_cast<Instruction>(Op)) {
-        Worklist.pushUsersToWorkList(*OpI);
-        Worklist.pushValue(OpI);
+    SmallPtrSet<Value *, 4> Visited;
+    for (Value *Op : Ops) {
+      if (Visited.insert(Op).second) {
+        if (auto *OpI = dyn_cast<Instruction>(Op)) {
+          if (RecursivelyDeleteTriviallyDeadInstructions(
+                  OpI, nullptr, nullptr, [this](Value *V) {
+                    if (auto I = dyn_cast<Instruction>(V)) {
+                      LLVM_DEBUG(dbgs() << "VC: Erased: " << *I << '\n');
+                      Worklist.remove(I);
+                    }
+                  }))
+            continue;
+          Worklist.pushUsersToWorkList(*OpI);
+          Worklist.pushValue(OpI);
+        }
       }
+    }
   }
 };
 } // namespace
@@ -553,9 +567,8 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
 /// the source vector (shift the scalar element) to a NewIndex for extraction.
 /// Return null if the input can be constant folded, so that we are not creating
 /// unnecessary instructions.
-static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
-                                            unsigned NewIndex,
-                                            IRBuilderBase &Builder) {
+static Value *translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex,
+                               IRBuilderBase &Builder) {
   // Shufflevectors can only be created for fixed-width vectors.
   Value *X = ExtElt->getVectorOperand();
   if (!isa<FixedVectorType>(X->getType()))
@@ -570,52 +583,43 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
 
   Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
                                    NewIndex, Builder);
-  return dyn_cast<ExtractElementInst>(
-      Builder.CreateExtractElement(Shuf, NewIndex));
+  return Shuf;
 }
 
 /// Try to reduce extract element costs by converting scalar compares to vector
 /// compares followed by extract.
-/// cmp (ext0 V0, C), (ext1 V1, C)
-void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
-                                  ExtractElementInst *Ext1, Instruction &I) {
+/// cmp (ext0 V0, ExtIndex), (ext1 V1, ExtIndex)
+Value *VectorCombine::foldExtExtCmp(Value *V0, Value *V1, Value *ExtIndex,
+                                    Instruction &I) {
   assert(isa<CmpInst>(&I) && "Expected a compare");
-  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
-             cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
-         "Expected matching constant extract indexes");
 
-  // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
+  // cmp Pred (extelt V0, ExtIndex), (extelt V1, ExtIndex)
+  //   --> extelt (cmp Pred V0, V1), ExtIndex
   ++NumVecCmp;
   CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
-  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
   Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
-  Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
-  replaceValue(I, *NewExt);
+  return Builder.CreateExtractElement(VecCmp, ExtIndex, "foldExtExtCmp");
 }
 
 /// Try to reduce extract element costs by converting scalar binops to vector
 /// binops followed by extract.
-/// bo (ext0 V0, C), (ext1 V1, C)
-void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
-                                    ExtractElementInst *Ext1, Instruction &I) {
+/// bo (ext0 V0, ExtIndex), (ext1 V1, ExtIndex)
+Value *VectorCombine::foldExtExtBinop(Value *V0, Value *V1, Value *ExtIndex,
+                                      Instruction &I) {
   assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
-  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
-             cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
-         "Expected matching constant extract indexes");
 
-  // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
+  // bo (extelt V0, ExtIndex), (extelt V1, ExtIndex)
+  //   --> extelt (bo V0, V1), ExtIndex
   ++NumVecBO;
-  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
-  Value *VecBO =
-      Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
+  Value *VecBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0,
+                                     V1, "foldExtExtBinop");
 
   // All IR flags are safe to back-propagate because any potential poison
   // created in unused vector elements is discarded by the extract.
   if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
     VecBOInst->copyIRFlags(&I);
 
-  Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
-  replaceValue(I, *NewExt);
+  return Builder.CreateExtractElement(VecBO, ExtIndex, "foldExtExtBinop");
 }
 
 /// Match an instruction with extracted vector operands.
@@ -654,25 +658,29 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
   if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
     return false;
 
+  Value *ExtOp0 = Ext0->getVectorOperand();
+  Value *ExtOp1 = Ext1->getVectorOperand();
+
   if (ExtractToChange) {
     unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
-    ExtractElementInst *NewExtract =
+    Value *NewExtOp =
         translateExtract(ExtractToChange, CheapExtractIdx, Builder);
-    if (!NewExtract)
+    if (!NewExtOp)
       return false;
     if (ExtractToChange == Ext0)
-      Ext0 = NewExtract;
+      ExtOp0 = NewExtOp;
     else
-      Ext1 = NewExtract;
+      ExtOp1 = NewExtOp;
   }
 
-  if (Pred != CmpInst::BAD_ICMP_PREDICATE)
-    foldExtExtCmp(Ext0, Ext1, I);
-  else
-    foldExtExtBinop(Ext0, Ext1, I);
-
+  Value *ExtIndex = ExtractToChange == Ext0 ? Ext1->getIndexOperand()
+                                            : Ext0->getIndexOperand();
+  Value *NewExt = Pred != CmpInst::BAD_ICMP_PREDICATE
+                      ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex, I)
+                      : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex, I);
   Worklist.push(Ext0);
   Worklist.push(Ext1);
+  replaceValue(I, *NewExt);
   return true;
 }
 
@@ -1825,7 +1833,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
         LI->getAlign(), VecTy->getElementType(), Idx, *DL);
     NewLoad->setAlignment(ScalarOpAlignment);
 
-    replaceValue(*EI, *NewLoad);
+    replaceValue(*EI, *NewLoad, false);
   }
 
   FailureGuard.release();
@@ -3113,7 +3121,7 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
         Shuffle->getOperand(0), Shuffle->getOperand(1), ConcatMask);
     LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n");
     replaceValue(*Shuffle, *NewShuffle);
-    MadeChanges = true;
+    return true;
   }
 
   // See if we can re-use foldSelectShuffle, getting it to reduce the size of
@@ -3609,7 +3617,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
   for (int S = 0, E = ReconstructMasks.size(); S != E; S++) {
     Builder.SetInsertPoint(Shuffles[S]);
     Value *NSV = Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]);
-    replaceValue(*Shuffles[S], *NSV);
+    replaceValue(*Shuffles[S], *NSV, false);
   }
 
   Worklist.pushValue(NSV0A);
@@ -3980,7 +3988,7 @@ bool VectorCombine::shrinkLoadForShuffles(Instruction &I) {
         Value *NewShuffle = Builder.CreateShuffleVector(
             NewLoad, PoisonValue::get(NewLoadTy), NewMask);
 
-        replaceValue(*Shuffle, *NewShuffle);
+        replaceValue(*Shuffle, *NewShuffle, false);
       }
 
       return true;
@@ -4096,8 +4104,7 @@ bool VectorCombine::run() {
 
   LLVM_DEBUG(dbgs() << "\n\nVECTORCOMBINE on " << F.getName() << "\n");
 
-  bool MadeChange = false;
-  auto FoldInst = [this, &MadeChange](Instruction &I) {
+  auto FoldInst = [this](Instruction &I) {
     Builder.SetInsertPoint(&I);
     bool IsVectorType = isa<VectorType>(I.getType());
     bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
@@ -4112,10 +4119,12 @@ bool VectorCombine::run() {
     if (IsFixedVectorType) {
       switch (Opcode) {
       case Instruction::InsertElement:
-        MadeChange |= vectorizeLoadInsert(I);
+        if (vectorizeLoadInsert(I))
+          return true;
         break;
       case Instruction::ShuffleVector:
-        MadeChange |= widenSubvectorLoad(I);
+        if (widenSubvectorLoad(I))
+          return true;
         break;
       default:
         break;
@@ -4125,19 +4134,25 @@ bool VectorCombine::run() {
     // This transform works with scalable and fixed vectors
     // TODO: Identify and allow other scalable transforms
     if (IsVectorType) {
-      MadeChange |= scalarizeOpOrCmp(I);
-      MadeChange |= scalarizeLoadExtract(I);
-      MadeChange |= scalarizeExtExtract(I);
-      MadeChange |= scalarizeVPIntrinsic(I);
-      MadeChange |= foldInterleaveIntrinsics(I);
+      if (scalarizeOpOrCmp(I))
+        return true;
+      if (scalarizeLoadExtract(I))
+        return true;
+      if (scalarizeExtExtract(I))
+        return true;
+      if (scalarizeVPIntrinsic(I))
+        return true;
+      if (foldInterleaveIntrinsics(I))
+        return true;
     }
 
     if (Opcode == Instruction::Store)
-      MadeChange |= foldSingleElementStore(I);
+      if (foldSingleElementStore(I))
+        return true;
 
     // If this is an early pipeline invocation of this pass, we are done.
     if (TryEarlyFoldsOnly)
-      return;
+      return false;
 
     // Otherwise, try folds that improve codegen but may interfere with
     // early IR canonicalizations.
@@ -4146,62 +4161,87 @@ bool VectorCombine::run() {
     if (IsFixedVectorType) {
       switch (Opcode) {
       case Instruction::InsertElement:
-        MadeChange |= foldInsExtFNeg(I);
-        MadeChange |= foldInsExtBinop(I);
-        MadeChange |= foldInsExtVectorToShuffle(I);
+        if (foldInsExtFNeg(I))
+          return true;
+        if (foldInsExtBinop(I))
+          return true;
+        if (foldInsExtVectorToShuffle(I))
+          return true;
         break;
       case Instruction::ShuffleVector:
-        MadeChange |= foldPermuteOfBinops(I);
-        MadeChange |= foldShuffleOfBinops(I);
-        MadeChange |= foldShuffleOfSelects(I);
-        MadeChange |= foldShuffleOfCastops(I);
-        MadeChange |= foldShuffleOfShuffles(I);
-        MadeChange |= foldShuffleOfIntrinsics(I);
-        MadeChange |= foldSelectShuffle(I);
-        MadeChange |= foldShuffleToIdentity(I);
+        if (foldPermuteOfBinops(I))
+          return true;
+        if (foldShuffleOfBinops(I))
+          return true;
+        if (foldShuffleOfSelects(I))
+          return true;
+        if (foldShuffleOfCastops(I))
+          return true;
+        if (foldShuffleOfShuffles(I))
+          return true;
+        if (foldShuffleOfIntrinsics(I))
+          return true;
+        if (foldSelectShuffle(I))
+          return true;
+        if (foldShuffleToIdentity(I))
+          return true;
         break;
       case Instruction::Load:
-        MadeChange |= shrinkLoadForShuffles(I);
+        if (shrinkLoadForShuffles(I))
+          return true;
         break;
       case Instruction::BitCast:
-        MadeChange |= foldBitcastShuffle(I);
+        if (foldBitcastShuffle(I))
+          return true;
         break;
       case Instruction::And:
       case Instruction::Or:
       case Instruction::Xor:
-        MadeChange |= foldBitOpOfCastops(I);
+        if (foldBitOpOfCastops(I))
+          return true;
         break;
       case Instruction::PHI:
-        MadeChange |= shrinkPhiOfShuffles(I);
+        if (shrinkPhiOfShuffles(I))
+          return true;
         break;
       default:
-        MadeChange |= shrinkType(I);
+        if (shrinkType(I))
+          return true;
         break;
       }
     } else {
       switch (Opcode) {
       case Instruction::Call:
-        MadeChange |= foldShuffleFromReductions(I);
-        MadeChange |= foldCastFromReductions(I);
+        if (foldShuffleFromReductions(I))
+          return true;
+        if (foldCastFromReductions(I))
+          return true;
         break;
       case Instruction::ICmp:
       case Instruction::FCmp:
-        MadeChange |= foldExtractExtract(I);
+        if (foldExtractExtract(I))
+          return true;
         break;
       case Instruction::Or:
-        MadeChange |= foldConcatOfBoolMasks(I);
+        if (foldConcatOfBoolMasks(I))
+          return true;
         [[fallthrough]];
       default:
         if (Instruction::isBinaryOp(Opcode)) {
-          MadeChange |= foldExtractExtract(I);
-          MadeChange |= foldExtractedCmps(I);
-          MadeChange |= foldBinopOfReductions(I);
+          if (foldExtractExtract(I))
+            return true;
+          if (foldExtractedCmps(I))
+            return true;
+          if (foldBinopOfReductions(I))
+            return true;
         }
         break;
       }
     }
+    return false;
   };
 
+  bool MadeChange = false;
   for (BasicBlock &BB : F) {
     // Ignore unreachable basic blocks.
     if (!DT.isReachableFromEntry(&BB))
@@ -4210,7 +4250,7 @@ bool VectorCombine::run() {
     for (Instruction &I : make_early_inc_range(BB)) {
       if (I.isDebugOrPseudoInst())
         continue;
-      FoldInst(I);
+      MadeChange |= FoldInst(I);
     }
   }
 
@@ -4224,7 +4264,7 @@ bool VectorCombine::run() {
       continue;
     }
 
-    FoldInst(*I);
+    MadeChange |= FoldInst(*I);
   }
 
   return MadeChange;
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
index eec0b5c3e276f..afe7d7498fc1d 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll
@@ -926,53 +926,17 @@ define void @same_op8_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[C]], align 4
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP5]], align 4
-; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT:    [[STRIDED_VEC12:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT:    [[STRIDED_VEC13:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT:    [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT:    [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT:    [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT:    [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT:    [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[WIDE_VEC19:%.*]] = load <16 x float>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT:    [[STRIDED_VEC21:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT:    [[STRIDED_VEC22:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT:    [[STRIDED_VEC23:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT:    [[STRIDED_VEC24:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT:    [[STRIDED_VEC25:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT:    [[STRIDED_VEC26:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT:    [[STRIDED_VEC27:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> [[STRIDED_VEC20]], <2 x float> [[STRIDED_VEC21]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> [[STRIDED_VEC]], <2 x float> [[STRIDED_VEC12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x float> [[TMP8]], [[TMP1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[STRIDED_VEC22]], <2 x float> [[STRIDED_VEC23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float> [[STRIDED_VEC13]], <2 x float> [[STRIDED_VEC14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP13:%.*]] = fmul fast <4 x float> [[TMP12]], [[TMP2]]
-; CHECK-NEXT:    [[TMP14:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <2 x float> [[STRIDED_VEC24]], <2 x float> [[STRIDED_VEC25]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x float> [[STRIDED_VEC15]], <2 x float> [[STRIDED_VEC16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP17:%.*]] = fmul fast <4 x float> [[TMP16]], [[TMP3]]
-; CHECK-NEXT:    [[TMP18:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <2 x float> [[STRIDED_VEC26]], <2 x float> [[STRIDED_VEC27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <2 x float> [[STRIDED_VEC17]], <2 x float> [[STRIDED_VEC18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP21:%.*]] = fmul fast <4 x float> [[TMP20]], [[TMP4]]
-; CHECK-NEXT:    [[TMP22:%.*]] = fadd fast <4 x float> [[TMP19]], [[TMP21]]
-; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP23]], <8 x float> [[TMP24]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[TMP1]]
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC19]], [[TMP4]]
 ; CHECK-NEXT:    store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
index 798df4cd4ff54..63f8250b5f3de 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll
@@ -404,13 +404,13 @@ define <16 x i16> @add_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
 ; SSE4-LABEL: @add_v16i16_FEuCBA98765432u0(
 ; SSE4-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE4-NEXT:    [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 poison, i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 26, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 11, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 26, i32 29, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE4-NEXT:    [[TMP6:%.*]] = add <16 x i16> [[TMP4]], [[TMP5]]
-; SSE4-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE4-NEXT:    [[TMP9:%.*]] = add <16 x i16> [[TMP7]], [[TMP8]]
-; SSE4-NEXT:    [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 3, i32 2, i32 poison, i32 1, i32 0, i32 27, i32 26, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
+; SSE4-NEXT:    [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
 ; SSE4-NEXT:    ret <16 x i16> [[RESULT]]
 ;
 ; AVX2-LABEL: @add_v16i16_FEuCBA98765432u0(
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
index fd160b7c57024..bbfe844400b0c 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll
@@ -398,13 +398,13 @@ define <16 x i16> @sub_v16i16_FEuCBA98765432u0(<16 x i16> %a, <16 x i16> %b) {
 ; SSE4-LABEL: @sub_v16i16_FEuCBA98765432u0(
 ; SSE4-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 1, i32 poison, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE4-NEXT:    [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 25, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 poison, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 27, i32 29, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 0, i32 poison, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[A]], <16 x i32> <i32 0, i32 poison, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 27, i32 29, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE4-NEXT:    [[TMP6:%.*]] = sub <16 x i16> [[TMP4]], [[TMP5]]
-; SSE4-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 14, i32 24, i32 poison, i32 28, i32 30, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <16 x i32> <i32 15, i32 25, i32 poison, i32 29, i32 31, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SSE4-NEXT:    [[TMP9:%.*]] = sub <16 x i16> [[TMP7]], [[TMP8]]
-; SSE4-NEXT:    [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 3, i32 2, i32 poison, i32 1, i32 0, i32 27, i32 26, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
+; SSE4-NEXT:    [[RESULT:%.*]] = shufflevector <16 x i16> [[TMP9]], <16 x i16> [[TMP6]], <16 x i32> <i32 4, i32 3, i32 poison, i32 1, i32 0, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 poison, i32 16>
 ; SSE4-NEXT:    ret <16 x i16> [[RESULT]]
 ;
 ; AVX2-LABEL: @sub_v16i16_FEuCBA98765432u0(
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
index d369279c15db4..41d77e89476ba 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll
@@ -268,7 +268,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-LABEL: @ext5_ext0_add(
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %e0 = extractelement <16 x i8> %x, i32 5
@@ -294,7 +294,7 @@ define float @ext1_ext0_fmul(<4 x float> %x) {
 ; CHECK-LABEL: @ext1_ext0_fmul(
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %e0 = extractelement <4 x float> %x, i32 1
@@ -363,7 +363,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) {
 ; AVX-LABEL: @ext7_ext4_fmul_v8f32(
 ; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison>
 ; AVX-NEXT:    [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4
+; AVX-NEXT:    [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
 ; AVX-NEXT:    ret float [[R]]
 ;
   %e0 = extractelement <8 x float> %x, i32 7
@@ -484,7 +484,7 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]]
 ; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]]
-; CHECK-NEXT:    [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT:    [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
 ; CHECK-NEXT:    ret i32 [[Z0123]]
 ;
   %z = and <4 x i32> %x, %y
@@ -504,7 +504,7 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
 ; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
-; CHECK-NEXT:    [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT:    [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
 ; CHECK-NEXT:    ret i32 [[X210]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
@@ -523,7 +523,7 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
 ; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]]
-; CHECK-NEXT:    [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT:    [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
 ; CHECK-NEXT:    ret i32 [[X2Y210]]
 ;
   %y0 = extractelement <4 x i32> %y, i32 0
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
index d11fb1426c94e..4c1ca82b2bd06 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -268,7 +268,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) {
 ; CHECK-LABEL: @ext5_ext0_add(
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %e0 = extractelement <16 x i8> %x, i32 5
@@ -294,7 +294,7 @@ define float @ext1_ext0_fmul(<4 x float> %x) {
 ; CHECK-LABEL: @ext1_ext0_fmul(
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
 ; CHECK-NEXT:    ret float [[R]]
 ;
   %e0 = extractelement <4 x float> %x, i32 1
@@ -363,7 +363,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) {
 ; AVX-LABEL: @ext7_ext4_fmul_v8f32(
 ; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison>
 ; AVX-NEXT:    [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]]
-; AVX-NEXT:    [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4
+; AVX-NEXT:    [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
 ; AVX-NEXT:    ret float [[R]]
 ;
   %e0 = extractelement <8 x float> %x, i32 7
@@ -490,7 +490,7 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]]
 ; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]]
-; CHECK-NEXT:    [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT:    [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
 ; CHECK-NEXT:    ret i32 [[Z0123]]
 ;
   %z = and <4 x i32> %x, %y
@@ -510,7 +510,7 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]]
 ; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
-; CHECK-NEXT:    [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT:    [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
 ; CHECK-NEXT:    ret i32 [[X210]]
 ;
   %x0 = extractelement <4 x i32> %x, i32 0
@@ -529,7 +529,7 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]]
 ; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]]
-; CHECK-NEXT:    [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT:    [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
 ; CHECK-NEXT:    ret i32 [[X2Y210]]
 ;
   %y0 = extractelement <4 x i32> %y, i32 0
@@ -573,10 +573,8 @@ define i64 @instsimplify_folder_crash(<4 x i64> %in) {
 ; CHECK-LABEL: @instsimplify_folder_crash(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[SHUFFLE_1:%.*]] = shufflevector <4 x i64> [[IN:%.*]], <4 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    [[E_0:%.*]] = extractelement <4 x i64> zeroinitializer, i64 0
-; CHECK-NEXT:    [[E_1:%.*]] = extractelement <4 x i64> [[SHUFFLE_1]], i64 1
-; CHECK-NEXT:    [[OR:%.*]] = or i64 [[E_1]], [[E_0]]
-; CHECK-NEXT:    ret i64 [[OR]]
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i64> [[SHUFFLE_1]], <4 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    ret i64 0
 ;
 entry:
   %shuffle.1 = shufflevector <4 x i64> %in, <4 x i64> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll
index 3dae93665b1ed..795832f22b096 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll
@@ -130,7 +130,7 @@ define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) {
 ; AVX-LABEL: @cmp10_v2f64(
 ; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
 ; AVX-NEXT:    [[TMP1:%.*]] = fcmp ule <2 x double> [[SHIFT]], [[Y:%.*]]
-; AVX-NEXT:    [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
+; AVX-NEXT:    [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; AVX-NEXT:    ret i1 [[CMP]]
 ;
   %x1 = extractelement <2 x double> %x, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll
index b26e5ec2698a5..50e32b79a91c2 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-extractelement-scalarization.ll
@@ -27,6 +27,8 @@ define void @multiple_extract(ptr %p) {
 ; infinite loop if we fold an extract that is waiting to be erased
 define void @unused_extract(ptr %p) {
 ; CHECK-LABEL: @unused_extract(
+; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, ptr [[P:%.*]], align 8
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <4 x float> [[LOAD]], i64 1
 ; CHECK-NEXT:    ret void
 ;
   %load = load <4 x float>, ptr %p, align 8
diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
index 0c2346e616e36..e8381d1b206e2 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
@@ -253,7 +253,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) %
 ; CHECK-LABEL: @gep01_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
 ; CHECK-NEXT:    [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 2
-; CHECK-NEXT:    ret <8 x i16> [[R]]
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <8 x i16> [[R]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    ret <8 x i16> [[R1]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
   %s = load i16, ptr %gep, align 2
@@ -341,7 +342,8 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %
 ; CHECK-LABEL: @gep10_load_i16_insert_v8i16(
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0
 ; CHECK-NEXT:    [[R:%.*]] = load <8 x i16>, ptr [[GEP]], align 16
-; CHECK-NEXT:    ret <8 x i16> [[R]]
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <8 x i16> [[R]], <8 x i16> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    ret <8 x i16> [[R1]]
 ;
   %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0
   %s = load i16, ptr %gep, align 16