[llvm] 0f8e0f4 - [X86] lowerBuildVectorAsBroadcast - broadcast Constant of original (BuildVector) element size
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat May 27 06:30:44 PDT 2023
Author: Simon Pilgrim
Date: 2023-05-27T14:05:44+01:00
New Revision: 0f8e0f4228805cbecce13dcfadef4c48a4f0f4cd
URL: https://github.com/llvm/llvm-project/commit/0f8e0f4228805cbecce13dcfadef4c48a4f0f4cd
DIFF: https://github.com/llvm/llvm-project/commit/0f8e0f4228805cbecce13dcfadef4c48a4f0f4cd.diff
LOG: [X86] lowerBuildVectorAsBroadcast - broadcast Constant of original (BuildVector) element size
Noticed in D150143/D150526 - we currently create scalar Constant values using the broadcast instruction width, which might be wider than the original build vector width, making it tricky to recognise the original constant bits data.
If we have widened the broadcast value, its much more useful for asm comments if we create a ConstantVector with the original element data, add that to the constant-pool and load that with the same (wider) broadcast instruction.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86MCInstLower.cpp
llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
llvm/test/CodeGen/X86/avg.ll
llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
llvm/test/CodeGen/X86/combine-bitselect.ll
llvm/test/CodeGen/X86/dpbusd_const.ll
llvm/test/CodeGen/X86/freeze-vector.ll
llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
llvm/test/CodeGen/X86/i64-to-float.ll
llvm/test/CodeGen/X86/masked_store_trunc.ll
llvm/test/CodeGen/X86/oddshuffles.ll
llvm/test/CodeGen/X86/packss.ll
llvm/test/CodeGen/X86/pr30284.ll
llvm/test/CodeGen/X86/pr62014.ll
llvm/test/CodeGen/X86/psubus.ll
llvm/test/CodeGen/X86/slow-pmulld.ll
llvm/test/CodeGen/X86/vec_uaddo.ll
llvm/test/CodeGen/X86/vec_umulo.ll
llvm/test/CodeGen/X86/vec_usubo.ll
llvm/test/CodeGen/X86/vector-bitreverse.ll
llvm/test/CodeGen/X86/vector-blend.ll
llvm/test/CodeGen/X86/vector-fshl-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll
llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
llvm/test/CodeGen/X86/vector-rotate-512.ll
llvm/test/CodeGen/X86/vector-sext.ll
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
llvm/test/CodeGen/X86/vector-trunc-math.ll
llvm/test/CodeGen/X86/vector-trunc.ll
llvm/test/CodeGen/X86/x86-interleaved-access.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 07518b1447d6e..9b433280d5d9d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7450,6 +7450,14 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
Mask = CFP->getValueAPF().bitcastToAPInt();
return true;
}
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(Cst)) {
+ Type *Ty = CDS->getType();
+ Mask = APInt::getZero(Ty->getPrimitiveSizeInBits());
+ unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
+ Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
+ return true;
+ }
return false;
};
@@ -7511,12 +7519,12 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD &&
EltSizeInBits <= VT.getScalarSizeInBits()) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
- if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits())
+ if (MemIntr->getMemoryVT().getStoreSizeInBits() != VT.getScalarSizeInBits())
return false;
SDValue Ptr = MemIntr->getBasePtr();
if (const Constant *C = getTargetConstantFromBasePtr(Ptr)) {
- unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits();
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
@@ -9695,24 +9703,27 @@ static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,
static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
unsigned SplatBitSize, LLVMContext &C) {
unsigned ScalarSize = VT.getScalarSizeInBits();
- unsigned NumElm = SplatBitSize / ScalarSize;
- SmallVector<Constant *, 32> ConstantVec;
- for (unsigned i = 0; i < NumElm; i++) {
- APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
- Constant *Const;
+ auto getConstantScalar = [&](const APInt &Val) -> Constant * {
if (VT.isFloatingPoint()) {
- if (ScalarSize == 16) {
- Const = ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
- } else if (ScalarSize == 32) {
- Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
- } else {
- assert(ScalarSize == 64 && "Unsupported floating point scalar size");
- Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
- }
- } else
- Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
- ConstantVec.push_back(Const);
+ if (ScalarSize == 16)
+ return ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
+ if (ScalarSize == 32)
+ return ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
+ assert(ScalarSize == 64 && "Unsupported floating point scalar size");
+ return ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
+ }
+ return Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
+ };
+
+ if (ScalarSize == SplatBitSize)
+ return getConstantScalar(SplatValue);
+
+ unsigned NumElm = SplatBitSize / ScalarSize;
+ SmallVector<Constant *, 32> ConstantVec;
+ for (unsigned I = 0; I != NumElm; ++I) {
+ APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * I);
+ ConstantVec.push_back(getConstantScalar(Val));
}
return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
}
@@ -9831,11 +9842,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
if (Subtarget.hasAVX()) {
if (SplatBitSize == 32 || SplatBitSize == 64 ||
(SplatBitSize < 32 && Subtarget.hasAVX2())) {
- // Splatted value can fit in one INTEGER constant in constant pool.
- // Load the constant and broadcast it.
+ // Load the constant scalar/subvector and broadcast it.
MVT CVT = MVT::getIntegerVT(SplatBitSize);
- Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
- Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
+ Constant *C = getConstantVector(VT, SplatValue, SplatBitSize, *Ctx);
SDValue CP = DAG.getConstantPool(C, PVT);
unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index d10bab2756ec2..2cbc31e20838e 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1530,6 +1530,20 @@ static void printConstant(const Constant *COp, raw_ostream &CS) {
printConstant(CI->getValue(), CS);
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
printConstant(CF->getValueAPF(), CS);
+ } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) {
+ Type *EltTy = CDS->getElementType();
+ bool IsInteger = EltTy->isIntegerTy();
+ bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ if (I != 0)
+ CS << ",";
+ if (IsInteger)
+ printConstant(CDS->getElementAsAPInt(I), CS);
+ else if (IsFP)
+ printConstant(CDS->getElementAsAPFloat(I), CS);
+ else
+ CS << "?";
+ }
} else {
CS << "?";
}
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index cb8e63539bf81..f64d9aa3280f0 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1022,7 +1022,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512F-LABEL: vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512F-NEXT: vpternlogd $202, (%rdi){1to8}, %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1032,7 +1032,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512DQ-LABEL: vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512DQ-NEXT: vpternlogd $202, (%rdi){1to8}, %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1120,7 +1120,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512F-LABEL: vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpternlogq $202, (%rdi){1to4}, %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1130,7 +1130,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512DQ-LABEL: vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512DQ-NEXT: vpternlogq $202, (%rdi){1to4}, %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -2154,7 +2154,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512F-LABEL: vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512F-NEXT: vpternlogd $202, (%rdi){1to8}, %ymm0, %ymm1
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
@@ -2168,7 +2168,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512DQ-LABEL: vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512DQ-NEXT: vpternlogd $202, (%rdi){1to8}, %ymm0, %ymm1
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
@@ -2387,7 +2387,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
; AVX512F-LABEL: vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpternlogq $202, (%rdi){1to4}, %ymm0, %ymm1
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
@@ -2401,7 +2401,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
; AVX512DQ-LABEL: vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512DQ-NEXT: vpternlogq $202, (%rdi){1to4}, %ymm0, %ymm1
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index 516d3d3312a26..d42b994357447 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -1033,7 +1033,7 @@ define void @avg_v32i8_const(ptr %a) nounwind {
;
; AVX1-LABEL: avg_v32i8_const:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = [506097522914230528,506097522914230528]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX1-NEXT: # xmm0 = mem[0,0]
; AVX1-NEXT: vpavgb (%rdi), %xmm0, %xmm1
; AVX1-NEXT: vpavgb 16(%rdi), %xmm0, %xmm0
@@ -1084,7 +1084,7 @@ define void @avg_v64i8_const(ptr %a) nounwind {
;
; AVX1-LABEL: avg_v64i8_const:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = [506097522914230528,506097522914230528]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX1-NEXT: # xmm0 = mem[0,0]
; AVX1-NEXT: vpavgb (%rdi), %xmm0, %xmm1
; AVX1-NEXT: vpavgb 16(%rdi), %xmm0, %xmm2
@@ -1098,7 +1098,7 @@ define void @avg_v64i8_const(ptr %a) nounwind {
;
; AVX2-LABEL: avg_v64i8_const:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm0 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX2-NEXT: vpavgb (%rdi), %ymm0, %ymm1
; AVX2-NEXT: vpavgb 32(%rdi), %ymm0, %ymm0
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
@@ -1108,7 +1108,7 @@ define void @avg_v64i8_const(ptr %a) nounwind {
;
; AVX512F-LABEL: avg_v64i8_const:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm0 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX512F-NEXT: vpavgb (%rdi), %ymm0, %ymm1
; AVX512F-NEXT: vpavgb 32(%rdi), %ymm0, %ymm0
; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
index 26bdbeb77ccd4..0086d05d1ef9c 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
@@ -3665,7 +3665,7 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask1(ptr %vp, <4
define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask2(ptr %vp, <4 x float> %vec2, <4 x float> %mask) {
; CHECK-LABEL: test_masked_16xfloat_to_4xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovddup {{.*#+}} xmm2 = [60129542148,60129542148]
+; CHECK-NEXT: vmovddup {{.*#+}} xmm2 = [4,14,4,14]
; CHECK-NEXT: # xmm2 = mem[0,0]
; CHECK-NEXT: vmovaps 32(%rdi), %ymm3
; CHECK-NEXT: vpermt2ps (%rdi), %ymm2, %ymm3
@@ -3684,7 +3684,7 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask2(ptr %vp, <4 x
define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask2(ptr %vp, <4 x float> %mask) {
; CHECK-LABEL: test_masked_z_16xfloat_to_4xfloat_perm_mem_mask2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovddup {{.*#+}} xmm2 = [60129542148,60129542148]
+; CHECK-NEXT: vmovddup {{.*#+}} xmm2 = [4,14,4,14]
; CHECK-NEXT: # xmm2 = mem[0,0]
; CHECK-NEXT: vmovaps 32(%rdi), %ymm1
; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index 94ba97f79a17c..a995e93708456 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -157,7 +157,7 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm1 = mem[0,0]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -167,7 +167,7 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -355,7 +355,7 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
@@ -367,7 +367,7 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -618,7 +618,7 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
-; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
@@ -640,7 +640,7 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX2-NEXT: vmovq %rdi, %xmm0
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index d8fb933ea3678..a08200fde8e78 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -199,7 +199,7 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm1 = mem[0,0]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -211,7 +211,7 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
@@ -454,7 +454,7 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
@@ -471,7 +471,7 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
@@ -807,7 +807,7 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
-; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
@@ -838,7 +838,7 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX2-NEXT: vmovq %rdi, %xmm0
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
index 7466a9379e9eb..aba2f2e171b01 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
@@ -204,7 +204,7 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
; AVX1: # %bb.0:
; AVX1-NEXT: vmovd %edi, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm1 = mem[0,0]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -216,7 +216,7 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
@@ -249,7 +249,7 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
@@ -266,7 +266,7 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
index e8b4390fe2420..c0dc8033710ed 100644
--- a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
+++ b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
@@ -26,7 +26,7 @@ define <16 x i8> @f16xi8_i16(<16 x i8> %a) {
;
; ALL32-LABEL: f16xi8_i16:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastw {{.*#+}} xmm1 = [256,256,256,256,256,256,256,256]
+; ALL32-NEXT: vpbroadcastw {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
@@ -40,7 +40,7 @@ define <16 x i8> @f16xi8_i16(<16 x i8> %a) {
;
; ALL64-LABEL: f16xi8_i16:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastw {{.*#+}} xmm1 = [256,256,256,256,256,256,256,256]
+; ALL64-NEXT: vpbroadcastw {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
@@ -53,28 +53,28 @@ define <16 x i8> @f16xi8_i16(<16 x i8> %a) {
define <16 x i8> @f16xi8_i32(<16 x i8> %a) {
; AVX-LABEL: f16xi8_i32:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: retl
;
; ALL32-LABEL: f16xi8_i32:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976]
+; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
; AVX-64-LABEL: f16xi8_i32:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976]
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: retq
;
; ALL64-LABEL: f16xi8_i32:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976]
+; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
@@ -87,7 +87,7 @@ define <16 x i8> @f16xi8_i32(<16 x i8> %a) {
define <16 x i8> @f16xi8_i64(<16 x i8> %a) {
; AVX-LABEL: f16xi8_i64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [506097522914230528,506097522914230528]
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -95,14 +95,14 @@ define <16 x i8> @f16xi8_i64(<16 x i8> %a) {
;
; ALL32-LABEL: f16xi8_i64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [506097522914230528,506097522914230528]
+; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
; AVX-64-LABEL: f16xi8_i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [506097522914230528,506097522914230528]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX-64-NEXT: # xmm1 = mem[0,0]
; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -110,7 +110,7 @@ define <16 x i8> @f16xi8_i64(<16 x i8> %a) {
;
; ALL64-LABEL: f16xi8_i64:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [506097522914230528,506097522914230528]
+; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
@@ -133,7 +133,7 @@ define <32 x i8> @f32xi8_i16(<32 x i8> %a) {
;
; ALL32-LABEL: f32xi8_i16:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastw {{.*#+}} ymm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
+; ALL32-NEXT: vpbroadcastw {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
@@ -150,7 +150,7 @@ define <32 x i8> @f32xi8_i16(<32 x i8> %a) {
;
; ALL64-LABEL: f32xi8_i16:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastw {{.*#+}} ymm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
+; ALL64-NEXT: vpbroadcastw {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
@@ -164,7 +164,7 @@ define <32 x i8> @f32xi8_i32(<32 x i8> %a) {
; AVX-LABEL: f32xi8_i32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [50462976,50462976,50462976,50462976]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@@ -173,7 +173,7 @@ define <32 x i8> @f32xi8_i32(<32 x i8> %a) {
;
; ALL32-LABEL: f32xi8_i32:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
@@ -181,7 +181,7 @@ define <32 x i8> @f32xi8_i32(<32 x i8> %a) {
; AVX-64-LABEL: f32xi8_i32:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [50462976,50462976,50462976,50462976]
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@@ -190,7 +190,7 @@ define <32 x i8> @f32xi8_i32(<32 x i8> %a) {
;
; ALL64-LABEL: f32xi8_i32:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
@@ -204,7 +204,7 @@ define <32 x i8> @f32xi8_i64(<32 x i8> %a) {
; AVX-LABEL: f32xi8_i64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [506097522914230528,506097522914230528]
+; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX-NEXT: # xmm2 = mem[0,0]
; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
@@ -214,7 +214,7 @@ define <32 x i8> @f32xi8_i64(<32 x i8> %a) {
;
; ALL32-LABEL: f32xi8_i64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
@@ -222,7 +222,7 @@ define <32 x i8> @f32xi8_i64(<32 x i8> %a) {
; AVX-64-LABEL: f32xi8_i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [506097522914230528,506097522914230528]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX-64-NEXT: # xmm2 = mem[0,0]
; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0
@@ -232,7 +232,7 @@ define <32 x i8> @f32xi8_i64(<32 x i8> %a) {
;
; ALL64-LABEL: f32xi8_i64:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
@@ -303,7 +303,7 @@ define <64 x i8> @f64xi8_i16(<64 x i8> %a) {
;
; AVX2-LABEL: f64xi8_i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -312,7 +312,7 @@ define <64 x i8> @f64xi8_i16(<64 x i8> %a) {
;
; AVX512BW-LABEL: f64xi8_i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
+; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retl
@@ -335,7 +335,7 @@ define <64 x i8> @f64xi8_i16(<64 x i8> %a) {
;
; AVX2-64-LABEL: f64xi8_i16:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vpbroadcastw {{.*#+}} ymm2 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
+; AVX2-64-NEXT: vpbroadcastw {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -344,7 +344,7 @@ define <64 x i8> @f64xi8_i16(<64 x i8> %a) {
;
; AVX512BW-64-LABEL: f64xi8_i16:
; AVX512BW-64: # %bb.0:
-; AVX512BW-64-NEXT: vpbroadcastw {{.*#+}} zmm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256]
+; AVX512BW-64-NEXT: vpbroadcastw {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: retq
@@ -357,7 +357,7 @@ define <64 x i8> @f64xi8_i16(<64 x i8> %a) {
define <64 x i8> @f64i8_i32(<64 x i8> %a) {
; AVX-LABEL: f64i8_i32:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3
; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
@@ -372,7 +372,7 @@ define <64 x i8> @f64i8_i32(<64 x i8> %a) {
;
; AVX2-LABEL: f64i8_i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -381,14 +381,14 @@ define <64 x i8> @f64i8_i32(<64 x i8> %a) {
;
; AVX512BW-LABEL: f64i8_i32:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retl
;
; AVX-64-LABEL: f64i8_i32:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3
; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
@@ -403,7 +403,7 @@ define <64 x i8> @f64i8_i32(<64 x i8> %a) {
;
; AVX2-64-LABEL: f64i8_i32:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX2-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -412,7 +412,7 @@ define <64 x i8> @f64i8_i32(<64 x i8> %a) {
;
; AVX512BW-64-LABEL: f64i8_i32:
; AVX512BW-64: # %bb.0:
-; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976]
+; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: retq
@@ -425,7 +425,7 @@ define <64 x i8> @f64i8_i32(<64 x i8> %a) {
define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
; AVX-LABEL: f64xi8_i64:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3
; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
@@ -440,7 +440,7 @@ define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
;
; AVX2-LABEL: f64xi8_i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -449,14 +449,14 @@ define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
;
; AVX512BW-LABEL: f64xi8_i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retl
;
; AVX-64-LABEL: f64xi8_i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3
; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1
@@ -471,7 +471,7 @@ define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
;
; AVX2-64-LABEL: f64xi8_i64:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX2-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -480,7 +480,7 @@ define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
;
; AVX512BW-64-LABEL: f64xi8_i64:
; AVX512BW-64: # %bb.0:
-; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528]
+; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: retq
@@ -641,28 +641,28 @@ define <64 x i8> @f64xi8_i256(<64 x i8> %a) {
define <8 x i16> @f8xi16_i32(<8 x i16> %a) {
; AVX-LABEL: f8xi16_i32:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [65536,65536,65536,65536]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1]
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: retl
;
; ALL32-LABEL: f8xi16_i32:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65536,65536,65536,65536]
+; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1]
; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
; AVX-64-LABEL: f8xi16_i32:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [65536,65536,65536,65536]
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1]
; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: retq
;
; ALL64-LABEL: f8xi16_i32:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65536,65536,65536,65536]
+; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1]
; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
@@ -675,7 +675,7 @@ define <8 x i16> @f8xi16_i32(<8 x i16> %a) {
define <8 x i16> @f8xi16_i64(<8 x i16> %a) {
; AVX-LABEL: f8xi16_i64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [844433520132096,844433520132096]
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -683,14 +683,14 @@ define <8 x i16> @f8xi16_i64(<8 x i16> %a) {
;
; ALL32-LABEL: f8xi16_i64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [844433520132096,844433520132096]
+; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3]
; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
; AVX-64-LABEL: f8xi16_i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [844433520132096,844433520132096]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3]
; AVX-64-NEXT: # xmm1 = mem[0,0]
; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -698,7 +698,7 @@ define <8 x i16> @f8xi16_i64(<8 x i16> %a) {
;
; ALL64-LABEL: f8xi16_i64:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [844433520132096,844433520132096]
+; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3]
; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
@@ -712,7 +712,7 @@ define <16 x i16> @f16xi16_i32(<16 x i16> %a) {
; AVX-LABEL: f16xi16_i32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [65536,65536,65536,65536]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1]
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@@ -721,7 +721,7 @@ define <16 x i16> @f16xi16_i32(<16 x i16> %a) {
;
; ALL32-LABEL: f16xi16_i32:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
@@ -729,7 +729,7 @@ define <16 x i16> @f16xi16_i32(<16 x i16> %a) {
; AVX-64-LABEL: f16xi16_i32:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [65536,65536,65536,65536]
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1]
; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@@ -738,7 +738,7 @@ define <16 x i16> @f16xi16_i32(<16 x i16> %a) {
;
; ALL64-LABEL: f16xi16_i32:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
@@ -752,7 +752,7 @@ define <16 x i16> @f16xi16_i64(<16 x i16> %a) {
; AVX-LABEL: f16xi16_i64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [844433520132096,844433520132096]
+; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,2,3,0,1,2,3]
; AVX-NEXT: # xmm2 = mem[0,0]
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
@@ -762,7 +762,7 @@ define <16 x i16> @f16xi16_i64(<16 x i16> %a) {
;
; ALL32-LABEL: f16xi16_i64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [844433520132096,844433520132096,844433520132096,844433520132096]
+; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
@@ -770,7 +770,7 @@ define <16 x i16> @f16xi16_i64(<16 x i16> %a) {
; AVX-64-LABEL: f16xi16_i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [844433520132096,844433520132096]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,2,3,0,1,2,3]
; AVX-64-NEXT: # xmm2 = mem[0,0]
; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0
@@ -780,7 +780,7 @@ define <16 x i16> @f16xi16_i64(<16 x i16> %a) {
;
; ALL64-LABEL: f16xi16_i64:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [844433520132096,844433520132096,844433520132096,844433520132096]
+; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
@@ -835,7 +835,7 @@ define <16 x i16> @f16xi16_i128(<16 x i16> %a) {
define <32 x i16> @f32xi16_i32(<32 x i16> %a) {
; AVX-LABEL: f32xi16_i32:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
@@ -850,7 +850,7 @@ define <32 x i16> @f32xi16_i32(<32 x i16> %a) {
;
; AVX2-LABEL: f32xi16_i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -859,14 +859,14 @@ define <32 x i16> @f32xi16_i32(<32 x i16> %a) {
;
; AVX512BW-LABEL: f32xi16_i32:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536]
+; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retl
;
; AVX-64-LABEL: f32xi16_i32:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3
; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
@@ -881,7 +881,7 @@ define <32 x i16> @f32xi16_i32(<32 x i16> %a) {
;
; AVX2-64-LABEL: f32xi16_i32:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536]
+; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX2-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -890,7 +890,7 @@ define <32 x i16> @f32xi16_i32(<32 x i16> %a) {
;
; AVX512BW-64-LABEL: f32xi16_i32:
; AVX512BW-64: # %bb.0:
-; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536]
+; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: retq
@@ -903,7 +903,7 @@ define <32 x i16> @f32xi16_i32(<32 x i16> %a) {
define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
; AVX-LABEL: f32xi16_i64:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [844433520132096,844433520132096,844433520132096,844433520132096]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
@@ -918,7 +918,7 @@ define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
;
; AVX2-LABEL: f32xi16_i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [844433520132096,844433520132096,844433520132096,844433520132096]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -927,14 +927,14 @@ define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
;
; AVX512BW-LABEL: f32xi16_i64:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retl
;
; AVX-64-LABEL: f32xi16_i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [844433520132096,844433520132096,844433520132096,844433520132096]
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3
; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1
@@ -949,7 +949,7 @@ define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
;
; AVX2-64-LABEL: f32xi16_i64:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [844433520132096,844433520132096,844433520132096,844433520132096]
+; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX2-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -958,7 +958,7 @@ define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
;
; AVX512BW-64-LABEL: f32xi16_i64:
; AVX512BW-64: # %bb.0:
-; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096]
+; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-64-NEXT: retq
@@ -1119,7 +1119,7 @@ define <32 x i16> @f32xi16_i256(<32 x i16> %a) {
define <4 x i32> @f4xi32_i64(<4 x i32> %a) {
; AVX-LABEL: f4xi32_i64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4294967296,4294967296]
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,0,1]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -1127,14 +1127,14 @@ define <4 x i32> @f4xi32_i64(<4 x i32> %a) {
;
; ALL32-LABEL: f4xi32_i64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967296,4294967296]
+; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,0,1]
; ALL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
; AVX-64-LABEL: f4xi32_i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [4294967296,4294967296]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,0,1]
; AVX-64-NEXT: # xmm1 = mem[0,0]
; AVX-64-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -1142,7 +1142,7 @@ define <4 x i32> @f4xi32_i64(<4 x i32> %a) {
;
; ALL64-LABEL: f4xi32_i64:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967296,4294967296]
+; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,0,1]
; ALL64-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
@@ -1156,7 +1156,7 @@ define <8 x i32> @f8xi32_i64(<8 x i32> %a) {
; AVX-LABEL: f8xi32_i64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [4294967296,4294967296]
+; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,0,1]
; AVX-NEXT: # xmm2 = mem[0,0]
; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
@@ -1166,7 +1166,7 @@ define <8 x i32> @f8xi32_i64(<8 x i32> %a) {
;
; ALL32-LABEL: f8xi32_i64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967296,4294967296,4294967296,4294967296]
+; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1]
; ALL32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL32-NEXT: retl
@@ -1174,7 +1174,7 @@ define <8 x i32> @f8xi32_i64(<8 x i32> %a) {
; AVX-64-LABEL: f8xi32_i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [4294967296,4294967296]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,0,1]
; AVX-64-NEXT: # xmm2 = mem[0,0]
; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0
@@ -1184,7 +1184,7 @@ define <8 x i32> @f8xi32_i64(<8 x i32> %a) {
;
; ALL64-LABEL: f8xi32_i64:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967296,4294967296,4294967296,4294967296]
+; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1]
; ALL64-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0
; ALL64-NEXT: retq
@@ -1239,7 +1239,7 @@ define <8 x i32> @f8xi32_i128(<8 x i32> %a) {
define <16 x i32> @f16xi32_i64(<16 x i32> %a) {
; AVX-LABEL: f16xi32_i64:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-NEXT: vpaddd %xmm2, %xmm3, %xmm3
; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
@@ -1254,7 +1254,7 @@ define <16 x i32> @f16xi32_i64(<16 x i32> %a) {
;
; AVX2-LABEL: f16xi32_i64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -1263,14 +1263,14 @@ define <16 x i32> @f16xi32_i64(<16 x i32> %a) {
;
; AVX512-LABEL: f16xi32_i64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296]
+; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retl
;
; AVX-64-LABEL: f16xi32_i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX-64-NEXT: vpaddd %xmm2, %xmm3, %xmm3
; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1
@@ -1285,7 +1285,7 @@ define <16 x i32> @f16xi32_i64(<16 x i32> %a) {
;
; AVX2-64-LABEL: f16xi32_i64:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296]
+; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
; AVX2-64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0
@@ -1294,7 +1294,7 @@ define <16 x i32> @f16xi32_i64(<16 x i32> %a) {
;
; AVX512F-64-LABEL: f16xi32_i64:
; AVX512F-64: # %bb.0:
-; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296]
+; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: retq
@@ -1572,7 +1572,7 @@ define <8 x i64> @f8xi64_i256(<8 x i64> %a) {
define <4 x float> @f4xf32_f64(<4 x float> %a) {
; AVX-LABEL: f4xf32_f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
+; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-NEXT: # xmm1 = mem[0,0]
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0
@@ -1580,7 +1580,7 @@ define <4 x float> @f4xf32_f64(<4 x float> %a) {
;
; ALL32-LABEL: f4xf32_f64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
+; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; ALL32-NEXT: # xmm1 = mem[0,0]
; ALL32-NEXT: vaddps %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vdivps %xmm0, %xmm1, %xmm0
@@ -1588,7 +1588,7 @@ define <4 x float> @f4xf32_f64(<4 x float> %a) {
;
; AVX-64-LABEL: f4xf32_f64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-64-NEXT: # xmm1 = mem[0,0]
; AVX-64-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: vdivps %xmm0, %xmm1, %xmm0
@@ -1596,7 +1596,7 @@ define <4 x float> @f4xf32_f64(<4 x float> %a) {
;
; ALL64-LABEL: f4xf32_f64:
; ALL64: # %bb.0:
-; ALL64-NEXT: vmovddup {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760]
+; ALL64-NEXT: vmovddup {{.*#+}} xmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; ALL64-NEXT: # xmm1 = mem[0,0]
; ALL64-NEXT: vaddps %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vdivps %xmm0, %xmm1, %xmm0
@@ -1610,28 +1610,28 @@ define <4 x float> @f4xf32_f64(<4 x float> %a) {
define <8 x float> @f8xf32_f64(<8 x float> %a) {
; AVX-LABEL: f8xf32_f64:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retl
;
; ALL32-LABEL: f8xf32_f64:
; ALL32: # %bb.0:
-; ALL32-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; ALL32-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0
; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0
; ALL32-NEXT: retl
;
; AVX-64-LABEL: f8xf32_f64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0
; AVX-64-NEXT: retq
;
; ALL64-LABEL: f8xf32_f64:
; ALL64: # %bb.0:
-; ALL64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; ALL64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0
; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0
; ALL64-NEXT: retq
@@ -1682,7 +1682,7 @@ define <8 x float> @f8xf32_f128(<8 x float> %a) {
define <16 x float> @f16xf32_f64(<16 x float> %a) {
; AVX-LABEL: f16xf32_f64:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0
@@ -1691,7 +1691,7 @@ define <16 x float> @f16xf32_f64(<16 x float> %a) {
;
; AVX2-LABEL: f16xf32_f64:
; AVX2: # %bb.0:
-; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0
@@ -1700,14 +1700,14 @@ define <16 x float> @f16xf32_f64(<16 x float> %a) {
;
; AVX512-LABEL: f16xf32_f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retl
;
; AVX-64-LABEL: f16xf32_f64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
@@ -1716,7 +1716,7 @@ define <16 x float> @f16xf32_f64(<16 x float> %a) {
;
; AVX2-64-LABEL: f16xf32_f64:
; AVX2-64: # %bb.0:
-; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0
@@ -1725,7 +1725,7 @@ define <16 x float> @f16xf32_f64(<16 x float> %a) {
;
; AVX512F-64-LABEL: f16xf32_f64:
; AVX512F-64: # %bb.0:
-; AVX512F-64-NEXT: vbroadcastsd {{.*#+}} zmm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760]
+; AVX512F-64-NEXT: vbroadcastsd {{.*#+}} zmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0]
; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0
; AVX512F-64-NEXT: retq
@@ -2023,28 +2023,28 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) {
; AVX-LABEL: f8xi16_i32_NaN:
; AVX: # %bb.0:
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776]
+; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,65466,0,65466,0,65466,0,65466]
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-NEXT: retl
;
; ALL32-LABEL: f8xi16_i32_NaN:
; ALL32: # %bb.0:
-; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776]
+; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,65466,0,65466,0,65466,0,65466]
; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL32-NEXT: retl
;
; AVX-64-LABEL: f8xi16_i32_NaN:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776]
+; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,65466,0,65466,0,65466,0,65466]
; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX-64-NEXT: retq
;
; ALL64-LABEL: f8xi16_i32_NaN:
; ALL64: # %bb.0:
-; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776]
+; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,65466,0,65466,0,65466,0,65466]
; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
; ALL64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll
index 43fd9243ea579..23f56d908cb2e 100644
--- a/llvm/test/CodeGen/X86/combine-bitselect.ll
+++ b/llvm/test/CodeGen/X86/combine-bitselect.ll
@@ -1123,7 +1123,7 @@ define void @constantfold_andn_mask() nounwind {
; AVX2-NEXT: pushq %rax
; AVX2-NEXT: callq use at PLT
; AVX2-NEXT: vmovdqu (%rax), %xmm1
-; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
+; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpavgb %xmm2, %xmm0, %xmm0
@@ -1141,7 +1141,7 @@ define void @constantfold_andn_mask() nounwind {
; AVX512F-NEXT: pushq %rax
; AVX512F-NEXT: callq use at PLT
; AVX512F-NEXT: vmovdqu (%rax), %xmm1
-; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpavgb %xmm2, %xmm0, %xmm0
; AVX512F-NEXT: vpandn %xmm1, %xmm0, %xmm0
@@ -1158,7 +1158,7 @@ define void @constantfold_andn_mask() nounwind {
; AVX512VL-NEXT: pushq %rax
; AVX512VL-NEXT: callq use at PLT
; AVX512VL-NEXT: vmovdqu (%rax), %xmm1
-; AVX512VL-NEXT: vpbroadcastw {{.*#+}} xmm2 = [63519,63519,63519,63519,63519,63519,63519,63519]
+; AVX512VL-NEXT: vpbroadcastw {{.*#+}} xmm2 = [31,248,31,248,31,248,31,248,31,248,31,248,31,248,31,248]
; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpavgb %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vpandn %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/dpbusd_const.ll b/llvm/test/CodeGen/X86/dpbusd_const.ll
index 7ccbcd4d0d699..29d8e58848588 100644
--- a/llvm/test/CodeGen/X86/dpbusd_const.ll
+++ b/llvm/test/CodeGen/X86/dpbusd_const.ll
@@ -267,7 +267,7 @@ entry:
define i32 @mul_64xi8_zc(<64 x i8> %a, i32 %c) {
; AVXVNNI-LABEL: mul_64xi8_zc:
; AVXVNNI: # %bb.0: # %entry
-; AVXVNNI-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1073873152,1073873152,1073873152,1073873152,1073873152,1073873152,1073873152,1073873152]
+; AVXVNNI-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,2,64,0,1,2,64,0,1,2,64,0,1,2,64,0,1,2,64,0,1,2,64,0,1,2,64,0,1,2,64]
; AVXVNNI-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVXVNNI-NEXT: vpxor %xmm4, %xmm4, %xmm4
; AVXVNNI-NEXT: {vex} vpdpbusd %ymm2, %ymm1, %ymm4
diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll
index 45587f1f33ece..93d6a9f3fc9a5 100644
--- a/llvm/test/CodeGen/X86/freeze-vector.ll
+++ b/llvm/test/CodeGen/X86/freeze-vector.ll
@@ -447,7 +447,7 @@ define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, p
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%edx), %edx
; X86-NEXT: andl $15, %edx
-; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,7]
+; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,0,7,0]
; X86-NEXT: # xmm0 = mem[0,0]
; X86-NEXT: vmovd %edx, %xmm1
; X86-NEXT: vpand %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
index 645d9438a2224..0c59e880dfd37 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll
@@ -53,7 +53,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
; X86-AVX1-LABEL: test_reduce_v2i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2
@@ -66,7 +66,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v2i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
@@ -525,7 +525,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-LABEL: test_reduce_v4i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4
@@ -544,7 +544,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
@@ -1220,7 +1220,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
;
; X86-AVX1-LABEL: test_reduce_v8i64:
; X86-AVX1: ## %bb.0:
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4
@@ -1248,7 +1248,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
;
; X86-AVX2-LABEL: test_reduce_v8i64:
; X86-AVX2: ## %bb.0:
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm4
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
index 8070b035f8866..b64b0bf244139 100644
--- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll
@@ -54,7 +54,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
; X86-AVX1-LABEL: test_reduce_v2i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2
@@ -67,7 +67,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v2i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
@@ -468,7 +468,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
;
; X86-AVX1-LABEL: test_reduce_v4i64:
; X86-AVX1: ## %bb.0:
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: ## xmm1 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
@@ -488,7 +488,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX2-LABEL: test_reduce_v4i64:
; X86-AVX2: ## %bb.0:
; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
@@ -1137,7 +1137,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-LABEL: test_reduce_v8i64:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
; X86-AVX1-NEXT: vxorps %xmm2, %xmm3, %xmm4
; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
@@ -1164,7 +1164,7 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
;
; X86-AVX2-LABEL: test_reduce_v8i64:
; X86-AVX2: ## %bb.0:
-; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
diff --git a/llvm/test/CodeGen/X86/i64-to-float.ll b/llvm/test/CodeGen/X86/i64-to-float.ll
index b5974f03563fc..b38f5c21e2c01 100644
--- a/llvm/test/CodeGen/X86/i64-to-float.ll
+++ b/llvm/test/CodeGen/X86/i64-to-float.ll
@@ -291,11 +291,11 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
;
; X86-AVX-LABEL: clamp_sitofp_2i64_2f64:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [18446744073709551361,18446744073709551361]
+; X86-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4294967041,4294967295,4294967041,4294967295]
; X86-AVX-NEXT: # xmm1 = mem[0,0]
; X86-AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [255,255]
+; X86-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [255,0,255,0]
; X86-AVX-NEXT: # xmm1 = mem[0,0]
; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc.ll b/llvm/test/CodeGen/X86/masked_store_trunc.ll
index 401d6e810ea2c..b756165172650 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc.ll
@@ -842,7 +842,7 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
; AVX2-LABEL: truncstore_v8i64_v8i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -3039,7 +3039,7 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, ptr %p, <16 x i32> %mask) {
; AVX2-LABEL: truncstore_v16i32_v16i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm5, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -5127,7 +5127,7 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, ptr %p, <32 x i8> %mask) {
; AVX2-LABEL: truncstore_v32i16_v32i8:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll
index 49911afc0123d..e17e9d3a6573e 100644
--- a/llvm/test/CodeGen/X86/oddshuffles.ll
+++ b/llvm/test/CodeGen/X86/oddshuffles.ll
@@ -1504,7 +1504,7 @@ define void @interleave_24i32_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-SLOW-NEXT: vmovups (%rdi), %ymm0
; AVX2-SLOW-NEXT: vmovups 32(%rdi), %ymm1
; AVX2-SLOW-NEXT: vmovups 64(%rdi), %ymm2
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm3 = [2,5,2,5,2,5,2,5]
; AVX2-SLOW-NEXT: vpermps %ymm2, %ymm3, %ymm3
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm5 = <0,3,6,1,4,7,u,u>
@@ -1534,7 +1534,7 @@ define void @interleave_24i32_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-FAST-ALL-NEXT: vmovups (%rdi), %ymm0
; AVX2-FAST-ALL-NEXT: vmovups 32(%rdi), %ymm1
; AVX2-FAST-ALL-NEXT: vmovups 64(%rdi), %ymm2
-; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm3 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-ALL-NEXT: vpermps %ymm2, %ymm3, %ymm3
; AVX2-FAST-ALL-NEXT: vblendps {{.*#+}} ymm4 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm5 = <0,3,6,1,4,7,u,u>
@@ -1564,7 +1564,7 @@ define void @interleave_24i32_out(ptr %p, ptr %q1, ptr %q2, ptr %q3) nounwind {
; AVX2-FAST-PERLANE-NEXT: vmovups (%rdi), %ymm0
; AVX2-FAST-PERLANE-NEXT: vmovups 32(%rdi), %ymm1
; AVX2-FAST-PERLANE-NEXT: vmovups 64(%rdi), %ymm2
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm3 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm2, %ymm3, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm4 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm5 = <0,3,6,1,4,7,u,u>
diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll
index 821d233b82376..341bac87d47af 100644
--- a/llvm/test/CodeGen/X86/packss.ll
+++ b/llvm/test/CodeGen/X86/packss.ll
@@ -174,7 +174,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; X86-AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
; X86-AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,1]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,0,1,0]
; X86-AVX1-NEXT: # xmm2 = mem[0,0]
; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
diff --git a/llvm/test/CodeGen/X86/pr30284.ll b/llvm/test/CodeGen/X86/pr30284.ll
index e9a07f7354e65..f4fb1b3ce72e3 100644
--- a/llvm/test/CodeGen/X86/pr30284.ll
+++ b/llvm/test/CodeGen/X86/pr30284.ll
@@ -21,7 +21,7 @@ define void @f_f___un_3C_unf_3E_un_3C_unf_3E_(<16 x i1> %x) {
; CHECK-NEXT: vpmovd2m %zmm0, %k1
; CHECK-NEXT: vmovapd 0, %zmm0
; CHECK-NEXT: vmovapd 64, %zmm1
-; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm2 = [68719476736,68719476736,68719476736,68719476736,68719476736,68719476736,68719476736,68719476736]
+; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm2 = [0,16,0,16,0,16,0,16,0,16,0,16,0,16,0,16]
; CHECK-NEXT: kshiftrw $8, %k1, %k2
; CHECK-NEXT: vorpd %zmm2, %zmm1, %zmm1 {%k2}
; CHECK-NEXT: vorpd %zmm2, %zmm0, %zmm0 {%k1}
diff --git a/llvm/test/CodeGen/X86/pr62014.ll b/llvm/test/CodeGen/X86/pr62014.ll
index e8e15d016f0a9..a2e01d44f2bfb 100644
--- a/llvm/test/CodeGen/X86/pr62014.ll
+++ b/llvm/test/CodeGen/X86/pr62014.ll
@@ -237,7 +237,7 @@ define <16 x i8> @select_cast_cond_multiuse_v16i8(<16 x i8> %x, <16 x i8> %y, i1
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index ac0037ab15003..55e2342e8b0e6 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -978,7 +978,7 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm5
; AVX2-NEXT: vpackssdw %xmm5, %xmm3, %xmm3
; AVX2-NEXT: vpacksswb %xmm3, %xmm4, %xmm3
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
; AVX2-NEXT: vpackusdw %ymm2, %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll
index b6706f9c46005..99024f6bba218 100644
--- a/llvm/test/CodeGen/X86/slow-pmulld.ll
+++ b/llvm/test/CodeGen/X86/slow-pmulld.ll
@@ -249,7 +249,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-SLOW-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX2-SLOW-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
+; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0]
; AVX2-SLOW-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpmaddwd %ymm2, %ymm1, %ymm1
; AVX2-SLOW-NEXT: ret{{[l|q]}}
@@ -259,7 +259,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
; AVX2-32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-32-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
+; AVX2-32-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0]
; AVX2-32-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0
; AVX2-32-NEXT: vpmaddwd %ymm2, %ymm1, %ymm1
; AVX2-32-NEXT: retl
@@ -269,7 +269,7 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
; AVX2-64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
+; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0]
; AVX2-64-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpmaddwd %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: retq
@@ -769,7 +769,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-SLOW-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX2-SLOW-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
+; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0]
; AVX2-SLOW-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpmaddwd %ymm2, %ymm1, %ymm1
; AVX2-SLOW-NEXT: ret{{[l|q]}}
@@ -779,7 +779,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
; AVX2-32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-32-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
+; AVX2-32-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0]
; AVX2-32-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0
; AVX2-32-NEXT: vpmaddwd %ymm2, %ymm1, %ymm1
; AVX2-32-NEXT: retl
@@ -789,7 +789,7 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
; AVX2-64-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,18778,18778,18778,18778,18778,18778,18778]
+; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0]
; AVX2-64-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0
; AVX2-64-NEXT: vpmaddwd %ymm2, %ymm1, %ymm1
; AVX2-64-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 7c5ef84ecb19b..32d2332fd3839 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -978,7 +978,7 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX1-LABEL: uaddo_v4i24:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
@@ -1006,7 +1006,7 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX2-LABEL: uaddo_v4i24:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
@@ -1034,7 +1034,7 @@ define <4 x i32> @uaddo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: uaddo_v4i24:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index add3710bedee3..e792fb9a8b271 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -2745,7 +2745,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX1-LABEL: umulo_v4i24:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm1[1,1,3,3]
@@ -2782,7 +2782,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX2-LABEL: umulo_v4i24:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
@@ -2819,7 +2819,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: umulo_v4i24:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index 2ccd3856ab7c8..6f63236206e0c 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -1025,7 +1025,7 @@ define <4 x i32> @usubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX1-LABEL: usubo_v4i24:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm1
@@ -1053,7 +1053,7 @@ define <4 x i32> @usubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX2-LABEL: usubo_v4i24:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm1
@@ -1081,7 +1081,7 @@ define <4 x i32> @usubo_v4i24(<4 x i24> %a0, <4 x i24> %a1, ptr %p2) nounwind {
;
; AVX512-LABEL: usubo_v4i24:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [16777215,16777215,16777215,16777215]
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm1
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index b592a60cd44eb..3d98cc95ad05c 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -1723,7 +1723,7 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind {
;
; GFNIAVX1-LABEL: test_bitreverse_v64i8:
; GFNIAVX1: # %bb.0:
-; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm1, %ymm1
; GFNIAVX1-NEXT: retq
@@ -2036,7 +2036,7 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
@@ -2384,7 +2384,7 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
@@ -2740,7 +2740,7 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; GFNIAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; GFNIAVX1-NEXT: vgf2p8affineqb $0, %ymm2, %ymm0, %ymm0
; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
; GFNIAVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll
index 53578cf6ce17a..2271db9d64038 100644
--- a/llvm/test/CodeGen/X86/vector-blend.ll
+++ b/llvm/test/CodeGen/X86/vector-blend.ll
@@ -619,7 +619,7 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
;
; AVX1-LABEL: constant_pblendvb_avx2:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [18374686483949879295,18374686483949879295,18374686483949879295,18374686483949879295]
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index bdec9cdf91a64..3ffd137921d7d 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -544,7 +544,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512F-NEXT: vpsrlw $1, %ymm5, %ymm5
@@ -562,7 +562,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512VL-NEXT: vpsrlw $1, %ymm5, %ymm5
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index daf4af02727cb..dd9689676edb9 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -300,7 +300,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5
@@ -317,7 +317,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 0bbc09dc11bd2..9630cc7876f6e 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -546,7 +546,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y, <16 x i
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
@@ -564,7 +564,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,15]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm3 = [15,0,0,0,15,0,0,0]
; AVX512VL-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm5
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm5
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index e673b1678baea..a1abdd9565af4 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -298,7 +298,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw
define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind {
; AVX512F-LABEL: splatvar_funnnel_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
@@ -315,7 +315,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw
;
; AVX512VL-LABEL: splatvar_funnnel_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm5
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll
index 701a69c452782..e5674bc467e0d 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll
@@ -2404,7 +2404,7 @@ define void @load_i16_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm12 = xmm13[0],xmm12[1],xmm13[2,3,4,5],xmm12[6],xmm13[7]
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm12 = xmm12[0,1,2,3,0,1,14,15,12,13,10,11,u,u,u,u]
; AVX2-FAST-NEXT: vinserti128 $1, %xmm12, %ymm0, %ymm12
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm13 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm13 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-NEXT: vpermd %ymm7, %ymm13, %ymm13
; AVX2-FAST-NEXT: vpshufhw {{.*#+}} ymm13 = ymm13[0,1,2,3,4,5,4,7,8,9,10,11,12,13,12,15]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm12 = ymm12[0,1,2,3,4,5,6],ymm13[7]
@@ -2890,7 +2890,7 @@ define void @load_i16_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-FAST-NEXT: vpblendw {{.*#+}} xmm13 = xmm14[0],xmm13[1],xmm14[2,3,4,5],xmm13[6],xmm14[7]
; AVX512F-FAST-NEXT: vpshufb {{.*#+}} xmm13 = xmm13[0,1,2,3,0,1,14,15,12,13,10,11,u,u,u,u]
; AVX512F-FAST-NEXT: vinserti128 $1, %xmm13, %ymm0, %ymm13
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm14 = [21474836482,21474836482,21474836482,21474836482]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} ymm14 = [2,5,2,5,2,5,2,5]
; AVX512F-FAST-NEXT: vpermd %ymm2, %ymm14, %ymm14
; AVX512F-FAST-NEXT: vpshufhw {{.*#+}} ymm14 = ymm14[0,1,2,3,4,5,4,7,8,9,10,11,12,13,12,15]
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3,4,5,6],ymm14[7]
@@ -5078,7 +5078,7 @@ define void @load_i16_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,0,1,14,15,12,13,10,11,12,13,14,15]
; AVX2-FAST-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm4 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm4 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-NEXT: vmovdqu %ymm7, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-FAST-NEXT: vpermd %ymm7, %ymm4, %ymm11
; AVX2-FAST-NEXT: vpshufhw {{.*#+}} ymm11 = ymm11[0,1,2,3,4,5,4,7,8,9,10,11,12,13,12,15]
@@ -6184,7 +6184,7 @@ define void @load_i16_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-ONLY-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5],xmm0[6],xmm2[7]
; AVX512F-ONLY-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,14,15,12,13,10,11,u,u,u,u]
; AVX512F-ONLY-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-ONLY-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [21474836482,21474836482,21474836482,21474836482]
+; AVX512F-ONLY-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,5,2,5,2,5,2,5]
; AVX512F-ONLY-FAST-NEXT: vpermd %ymm28, %ymm2, %ymm2
; AVX512F-ONLY-FAST-NEXT: vpshufhw {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5,4,7,8,9,10,11,12,13,12,15]
; AVX512F-ONLY-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7]
@@ -6851,7 +6851,7 @@ define void @load_i16_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512DQ-FAST-NEXT: vpblendw {{.*#+}} xmm6 = xmm11[0],xmm6[1],xmm11[2,3,4,5],xmm6[6],xmm11[7]
; AVX512DQ-FAST-NEXT: vpshufb {{.*#+}} xmm6 = xmm6[0,1,2,3,0,1,14,15,12,13,10,11,u,u,u,u]
; AVX512DQ-FAST-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
-; AVX512DQ-FAST-NEXT: vpbroadcastq {{.*#+}} ymm11 = [21474836482,21474836482,21474836482,21474836482]
+; AVX512DQ-FAST-NEXT: vpbroadcastq {{.*#+}} ymm11 = [2,5,2,5,2,5,2,5]
; AVX512DQ-FAST-NEXT: vpermd %ymm27, %ymm11, %ymm11
; AVX512DQ-FAST-NEXT: vpshufhw {{.*#+}} ymm11 = ymm11[0,1,2,3,4,5,4,7,8,9,10,11,12,13,12,15]
; AVX512DQ-FAST-NEXT: vpblendd {{.*#+}} ymm11 = ymm6[0,1,2,3,4,5,6],ymm11[7]
@@ -11300,7 +11300,7 @@ define void @load_i16_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,0,1,14,15,12,13,10,11,12,13,14,15]
; AVX2-FAST-NEXT: vpshufb %xmm1, %xmm0, %xmm0
; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload
; AVX2-FAST-NEXT: vpermd %ymm5, %ymm2, %ymm3
; AVX2-FAST-NEXT: vpshufhw {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5,4,7,8,9,10,11,12,13,12,15]
@@ -13645,7 +13645,7 @@ define void @load_i16_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512F-ONLY-FAST-NEXT: vmovdqa {{.*#+}} xmm8 = [0,1,2,3,0,1,14,15,12,13,10,11,12,13,14,15]
; AVX512F-ONLY-FAST-NEXT: vpshufb %xmm8, %xmm4, %xmm4
; AVX512F-ONLY-FAST-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
-; AVX512F-ONLY-FAST-NEXT: vpbroadcastq {{.*#+}} ymm11 = [21474836482,21474836482,21474836482,21474836482]
+; AVX512F-ONLY-FAST-NEXT: vpbroadcastq {{.*#+}} ymm11 = [2,5,2,5,2,5,2,5]
; AVX512F-ONLY-FAST-NEXT: vpermd %ymm18, %ymm11, %ymm12
; AVX512F-ONLY-FAST-NEXT: vpshufhw {{.*#+}} ymm12 = ymm12[0,1,2,3,4,5,4,7,8,9,10,11,12,13,12,15]
; AVX512F-ONLY-FAST-NEXT: vpblendd {{.*#+}} ymm12 = ymm4[0,1,2,3,4,5,6],ymm12[7]
@@ -15248,7 +15248,7 @@ define void @load_i16_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX512DQ-FAST-NEXT: vpshufb %xmm2, %xmm4, %xmm4
; AVX512DQ-FAST-NEXT: vmovdqa64 %xmm2, %xmm29
; AVX512DQ-FAST-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
-; AVX512DQ-FAST-NEXT: vpbroadcastq {{.*#+}} ymm10 = [21474836482,21474836482,21474836482,21474836482]
+; AVX512DQ-FAST-NEXT: vpbroadcastq {{.*#+}} ymm10 = [2,5,2,5,2,5,2,5]
; AVX512DQ-FAST-NEXT: vpermd %ymm23, %ymm10, %ymm11
; AVX512DQ-FAST-NEXT: vpshufhw {{.*#+}} ymm11 = ymm11[0,1,2,3,4,5,4,7,8,9,10,11,12,13,12,15]
; AVX512DQ-FAST-NEXT: vpblendd {{.*#+}} ymm11 = ymm4[0,1,2,3,4,5,6],ymm11[7]
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
index 129f6712137bc..3e65c31cf83a1 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll
@@ -285,7 +285,7 @@ define void @load_i32_stride3_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-SLOW-NEXT: vmovaps (%rdi), %ymm0
; AVX2-SLOW-NEXT: vmovaps 32(%rdi), %ymm1
; AVX2-SLOW-NEXT: vmovaps 64(%rdi), %ymm2
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm3 = [2,5,2,5,2,5,2,5]
; AVX2-SLOW-NEXT: vpermps %ymm2, %ymm3, %ymm3
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm5 = <0,3,6,1,4,7,u,u>
@@ -315,7 +315,7 @@ define void @load_i32_stride3_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-NEXT: vmovaps (%rdi), %ymm0
; AVX2-FAST-NEXT: vmovaps 32(%rdi), %ymm1
; AVX2-FAST-NEXT: vmovaps 64(%rdi), %ymm2
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm3 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm3, %ymm3
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm5 = <0,3,6,1,4,7,u,u>
@@ -345,7 +345,7 @@ define void @load_i32_stride3_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-PERLANE-NEXT: vmovaps (%rdi), %ymm0
; AVX2-FAST-PERLANE-NEXT: vmovaps 32(%rdi), %ymm1
; AVX2-FAST-PERLANE-NEXT: vmovaps 64(%rdi), %ymm2
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm3 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm2, %ymm3, %ymm3
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm4 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm5 = <0,3,6,1,4,7,u,u>
@@ -557,7 +557,7 @@ define void @load_i32_stride3_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vmovaps 32(%rdi), %ymm3
; AVX2-SLOW-NEXT: vmovaps 64(%rdi), %ymm4
; AVX2-SLOW-NEXT: vmovaps 96(%rdi), %ymm5
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm6 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm6 = [2,5,2,5,2,5,2,5]
; AVX2-SLOW-NEXT: vpermps %ymm4, %ymm6, %ymm7
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm8 = ymm2[0],ymm3[1],ymm2[2,3],ymm3[4],ymm2[5,6],ymm3[7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm9 = <0,3,6,1,4,7,u,u>
@@ -606,7 +606,7 @@ define void @load_i32_stride3_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vmovaps 32(%rdi), %ymm3
; AVX2-FAST-NEXT: vmovaps 64(%rdi), %ymm4
; AVX2-FAST-NEXT: vmovaps 96(%rdi), %ymm5
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm6 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm6 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-NEXT: vpermps %ymm4, %ymm6, %ymm7
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm8 = ymm2[0],ymm3[1],ymm2[2,3],ymm3[4],ymm2[5,6],ymm3[7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm9 = <0,3,6,1,4,7,u,u>
@@ -654,7 +654,7 @@ define void @load_i32_stride3_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vmovaps 32(%rdi), %ymm3
; AVX2-FAST-PERLANE-NEXT: vmovaps 64(%rdi), %ymm4
; AVX2-FAST-PERLANE-NEXT: vmovaps 96(%rdi), %ymm5
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm6 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm6 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm4, %ymm6, %ymm7
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm8 = ymm2[0],ymm3[1],ymm2[2,3],ymm3[4],ymm2[5,6],ymm3[7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm9 = <0,3,6,1,4,7,u,u>
@@ -1105,7 +1105,7 @@ define void @load_i32_stride3_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vmovaps 64(%rdi), %ymm2
; AVX2-SLOW-NEXT: vmovaps 96(%rdi), %ymm15
; AVX2-SLOW-NEXT: vmovaps 160(%rdi), %ymm12
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm8 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm8 = [2,5,2,5,2,5,2,5]
; AVX2-SLOW-NEXT: vpermps %ymm12, %ymm8, %ymm0
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm9 = ymm15[0],ymm10[1],ymm15[2,3],ymm10[4],ymm15[5,6],ymm10[7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm11 = <0,3,6,1,4,7,u,u>
@@ -1209,7 +1209,7 @@ define void @load_i32_stride3_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vmovaps 64(%rdi), %ymm7
; AVX2-FAST-NEXT: vmovaps 96(%rdi), %ymm11
; AVX2-FAST-NEXT: vmovaps 160(%rdi), %ymm12
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm8 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm8 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-NEXT: vpermps %ymm12, %ymm8, %ymm0
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm10 = ymm11[0],ymm9[1],ymm11[2,3],ymm9[4],ymm11[5,6],ymm9[7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm13 = <0,3,6,1,4,7,u,u>
@@ -1310,7 +1310,7 @@ define void @load_i32_stride3_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vmovaps 64(%rdi), %ymm2
; AVX2-FAST-PERLANE-NEXT: vmovaps 96(%rdi), %ymm15
; AVX2-FAST-PERLANE-NEXT: vmovaps 160(%rdi), %ymm12
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm8 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm8 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm12, %ymm8, %ymm0
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm9 = ymm15[0],ymm10[1],ymm15[2,3],ymm10[4],ymm15[5,6],ymm10[7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm11 = <0,3,6,1,4,7,u,u>
@@ -2250,7 +2250,7 @@ define void @load_i32_stride3_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vmovaps 96(%rdi), %ymm12
; AVX2-SLOW-NEXT: vmovaps 160(%rdi), %ymm1
; AVX2-SLOW-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm0 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2,5,2,5,2,5,2,5]
; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm0, %ymm1
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm2 = ymm12[0],ymm2[1],ymm12[2,3],ymm2[4],ymm12[5,6],ymm2[7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} ymm13 = <0,3,6,1,4,7,u,u>
@@ -2495,7 +2495,7 @@ define void @load_i32_stride3_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vmovaps 96(%rdi), %ymm14
; AVX2-FAST-NEXT: vmovaps 160(%rdi), %ymm1
; AVX2-FAST-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm0 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm0, %ymm1
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm14[0],ymm10[1],ymm14[2,3],ymm10[4],ymm14[5,6],ymm10[7]
; AVX2-FAST-NEXT: vmovups %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2732,7 +2732,7 @@ define void @load_i32_stride3_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vmovaps 96(%rdi), %ymm12
; AVX2-FAST-PERLANE-NEXT: vmovaps 160(%rdi), %ymm1
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm0 = [21474836482,21474836482,21474836482,21474836482]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2,5,2,5,2,5,2,5]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm1, %ymm0, %ymm1
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm2 = ymm12[0],ymm2[1],ymm12[2,3],ymm2[4],ymm12[5,6],ymm2[7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} ymm13 = <0,3,6,1,4,7,u,u>
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
index d7ba5150d0e44..8c7a91013144e 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll
@@ -332,7 +332,7 @@ define void @load_i32_stride4_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vmovaps 32(%rdi), %ymm4
; AVX2-ONLY-NEXT: vmovaps 64(%rdi), %ymm1
; AVX2-ONLY-NEXT: vmovaps 96(%rdi), %ymm2
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm3 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm3 = [0,4,0,4,0,4,0,4]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm3, %ymm5
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm3, %ymm3
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm5[6,7]
@@ -344,7 +344,7 @@ define void @load_i32_stride4_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm9 = xmm7[0],xmm8[0],xmm7[1],xmm8[1]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm6 = xmm9[0,1],xmm6[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm6[0,1,2,3],ymm3[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm6 = [21474836481,21474836481,21474836481,21474836481]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1,5,1,5,1,5,1,5]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm6, %ymm9
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm6, %ymm6
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm9[6,7]
@@ -354,7 +354,7 @@ define void @load_i32_stride4_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vpermps %ymm0, %ymm11, %ymm11
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm10 = xmm11[0,1],xmm10[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm10[0,1,2,3],ymm6[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [25769803778,25769803778,25769803778,25769803778]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [2,6,2,6,2,6,2,6]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm10, %ymm11
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm10, %ymm10
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm10 = ymm10[0,1,2,3,4,5],ymm11[6,7]
@@ -363,7 +363,7 @@ define void @load_i32_stride4_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm7 = xmm7[2],xmm8[2],xmm7[3],xmm8[3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm4 = xmm7[0,1],xmm4[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1,2,3],ymm10[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm7 = [30064771075,30064771075,30064771075,30064771075]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm7 = [3,7,3,7,3,7,3,7]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm7, %ymm2
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm7, %ymm1
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7]
@@ -666,7 +666,7 @@ define void @load_i32_stride4_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovups %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-ONLY-NEXT: vmovaps 192(%rdi), %ymm3
; AVX2-ONLY-NEXT: vmovaps 224(%rdi), %ymm2
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [0,4,0,4,0,4,0,4]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm0, %ymm1
; AVX2-ONLY-NEXT: vpermps %ymm3, %ymm0, %ymm6
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm6[0,1,2,3,4,5],ymm1[6,7]
@@ -688,7 +688,7 @@ define void @load_i32_stride4_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm6[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-ONLY-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [21474836481,21474836481,21474836481,21474836481]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,5,1,5,1,5,1,5]
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm0, %ymm1
; AVX2-ONLY-NEXT: vpermps %ymm4, %ymm0, %ymm6
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm1 = ymm6[0,1,2,3,4,5],ymm1[6,7]
@@ -711,7 +711,7 @@ define void @load_i32_stride4_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm7 = xmm7[0,1],xmm8[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm7 = ymm7[0,1,2,3],ymm9[4,5,6,7]
; AVX2-ONLY-NEXT: vmovups %ymm7, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm7 = [25769803778,25769803778,25769803778,25769803778]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm7 = [2,6,2,6,2,6,2,6]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm7, %ymm8
; AVX2-ONLY-NEXT: vpermps %ymm3, %ymm7, %ymm9
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm8 = ymm9[0,1,2,3,4,5],ymm8[6,7]
@@ -727,7 +727,7 @@ define void @load_i32_stride4_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm10 = xmm12[2],xmm13[2],xmm12[3],xmm13[3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm9 = xmm10[0,1],xmm9[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm7 = ymm9[0,1,2,3],ymm7[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm9 = [30064771075,30064771075,30064771075,30064771075]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm9 = [3,7,3,7,3,7,3,7]
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm9, %ymm5
; AVX2-ONLY-NEXT: vpermps %ymm4, %ymm9, %ymm4
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm5[6,7]
@@ -1367,7 +1367,7 @@ define void @load_i32_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovups %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-ONLY-NEXT: vmovaps 320(%rdi), %ymm3
; AVX2-ONLY-NEXT: vmovaps 352(%rdi), %ymm2
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [0,4,0,4,0,4,0,4]
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm0, %ymm1
; AVX2-ONLY-NEXT: vmovaps %ymm2, %ymm10
; AVX2-ONLY-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1428,7 +1428,7 @@ define void @load_i32_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-ONLY-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm3 = [21474836481,21474836481,21474836481,21474836481]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1,5,1,5,1,5,1,5]
; AVX2-ONLY-NEXT: vpermps %ymm10, %ymm3, %ymm0
; AVX2-ONLY-NEXT: vpermps %ymm9, %ymm3, %ymm1
; AVX2-ONLY-NEXT: vmovaps %ymm9, %ymm4
@@ -1486,7 +1486,7 @@ define void @load_i32_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm11 = xmm13[0,1],xmm11[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5,6,7]
; AVX2-ONLY-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [25769803778,25769803778,25769803778,25769803778]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2,6,2,6,2,6,2,6]
; AVX2-ONLY-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Reload
; AVX2-ONLY-NEXT: vpermps %ymm6, %ymm0, %ymm11
; AVX2-ONLY-NEXT: vpermps %ymm4, %ymm0, %ymm13
@@ -1527,7 +1527,7 @@ define void @load_i32_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: # xmm10 = xmm2[2],mem[2],xmm2[3],mem[3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm10[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm13 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [30064771075,30064771075,30064771075,30064771075]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [3,7,3,7,3,7,3,7]
; AVX2-ONLY-NEXT: vpermps %ymm15, %ymm10, %ymm0
; AVX2-ONLY-NEXT: vpermps %ymm14, %ymm10, %ymm1
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
@@ -2843,7 +2843,7 @@ define void @load_i32_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovups %ymm9, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-ONLY-NEXT: vmovaps 192(%rdi), %ymm2
; AVX2-ONLY-NEXT: vmovaps 224(%rdi), %ymm1
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm6 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm6 = [0,4,0,4,0,4,0,4]
; AVX2-ONLY-NEXT: vpermps %ymm1, %ymm6, %ymm0
; AVX2-ONLY-NEXT: vmovaps %ymm1, %ymm5
; AVX2-ONLY-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2986,7 +2986,7 @@ define void @load_i32_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm6[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-ONLY-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm6 = [21474836481,21474836481,21474836481,21474836481]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1,5,1,5,1,5,1,5]
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm6, %ymm0
; AVX2-ONLY-NEXT: vmovaps %ymm10, %ymm2
; AVX2-ONLY-NEXT: vmovups %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -3110,7 +3110,7 @@ define void @load_i32_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],xmm10[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-ONLY-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [25769803778,25769803778,25769803778,25769803778]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2,6,2,6,2,6,2,6]
; AVX2-ONLY-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm9 # 32-byte Reload
; AVX2-ONLY-NEXT: vpermps %ymm9, %ymm0, %ymm1
; AVX2-ONLY-NEXT: vpermps %ymm2, %ymm0, %ymm10
@@ -3199,7 +3199,7 @@ define void @load_i32_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-ONLY-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [30064771075,30064771075,30064771075,30064771075]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [3,7,3,7,3,7,3,7]
; AVX2-ONLY-NEXT: vpermps %ymm9, %ymm0, %ymm1
; AVX2-ONLY-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm2 # 32-byte Folded Reload
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm1[6,7]
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll
index 6016de95d255d..556e2389b985f 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll
@@ -104,9 +104,9 @@ define void @load_i32_stride5_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-FAST-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512F-FAST-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm4 = [25769803777,25769803777]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm4 = [1,6,1,6]
; AVX512F-FAST-NEXT: vpermi2d %xmm1, %xmm0, %xmm4
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm5 = [30064771074,30064771074]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm5 = [2,7,2,7]
; AVX512F-FAST-NEXT: vpermi2d %xmm1, %xmm0, %xmm5
; AVX512F-FAST-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX512F-FAST-NEXT: vpbroadcastd 16(%rdi), %ymm1
@@ -145,9 +145,9 @@ define void @load_i32_stride5_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512BW-FAST-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX512BW-FAST-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX512BW-FAST-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm4 = [25769803777,25769803777]
+; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm4 = [1,6,1,6]
; AVX512BW-FAST-NEXT: vpermi2d %xmm1, %xmm0, %xmm4
-; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm5 = [30064771074,30064771074]
+; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm5 = [2,7,2,7]
; AVX512BW-FAST-NEXT: vpermi2d %xmm1, %xmm0, %xmm5
; AVX512BW-FAST-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX512BW-FAST-NEXT: vpbroadcastd 16(%rdi), %ymm1
@@ -505,10 +505,10 @@ define void @load_i32_stride5_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm8 = ymm3[0,1,2,3,4,5,6],ymm8[7]
; AVX2-ONLY-NEXT: vpshufd {{.*#+}} ymm8 = ymm8[2,3,0,1,6,7,4,5]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1,2],ymm8[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm8 = [21474836480,21474836480,21474836480,21474836480]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm8 = [0,5,0,5,0,5,0,5]
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm8, %ymm8
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1,2,3,4,5],ymm8[6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm8 = [25769803777,25769803777,25769803777,25769803777]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm8 = [1,6,1,6,1,6,1,6]
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm8, %ymm8
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm4[4,5],ymm3[6,7]
; AVX2-ONLY-NEXT: vpshufd {{.*#+}} ymm9 = ymm9[3,0,2,2,7,4,6,6]
@@ -522,7 +522,7 @@ define void @load_i32_stride5_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5],ymm2[6,7]
; AVX2-ONLY-NEXT: vpermd %ymm1, %ymm4, %ymm1
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm3[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm2 = [30064771074,30064771074,30064771074,30064771074]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2,7,2,7,2,7,2,7]
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
; AVX2-ONLY-NEXT: vmovdqa %ymm5, (%rsi)
@@ -1015,7 +1015,7 @@ define void @load_i32_stride5_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm15 = ymm8[0,1,2,3,4,5,6],ymm15[7]
; AVX2-ONLY-NEXT: vpshufd {{.*#+}} ymm15 = ymm15[2,3,0,1,6,7,4,5]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2],ymm15[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm15 = [21474836480,21474836480,21474836480,21474836480]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm15 = [0,5,0,5,0,5,0,5]
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm15, %ymm7
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3,4,5],ymm7[6,7]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm7 = ymm6[0,1,2,3],ymm4[4,5],ymm6[6,7]
@@ -1032,7 +1032,7 @@ define void @load_i32_stride5_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vpalignr {{.*#+}} ymm10 = ymm3[12,13,14,15],ymm5[0,1,2,3,4,5,6,7,8,9,10,11],ymm3[28,29,30,31],ymm5[16,17,18,19,20,21,22,23,24,25,26,27]
; AVX2-ONLY-NEXT: vpermq {{.*#+}} ymm10 = ymm10[0,3,2,3]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm10 = ymm10[0,1,2],ymm15[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm15 = [25769803777,25769803777,25769803777,25769803777]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm15 = [1,6,1,6,1,6,1,6]
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm15, %ymm11
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm10 = ymm10[0,1,2,3,4,5],ymm11[6,7]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm11 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
@@ -1048,7 +1048,7 @@ define void @load_i32_stride5_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} xmm5 = <4,1,6,u>
; AVX2-ONLY-NEXT: vpermd %ymm3, %ymm5, %ymm3
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1,2],ymm8[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm8 = [30064771074,30064771074,30064771074,30064771074]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm8 = [2,7,2,7,2,7,2,7]
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm8, %ymm0
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5],ymm0[6,7]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm6[4,5],ymm4[6,7]
@@ -2161,7 +2161,7 @@ define void @load_i32_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3,4,5,6,7]
; AVX2-ONLY-NEXT: vmovdqa 288(%rdi), %ymm2
; AVX2-ONLY-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm0 = [21474836480,21474836480,21474836480,21474836480]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,5,0,5,0,5,0,5]
; AVX2-ONLY-NEXT: vpermd %ymm2, %ymm0, %ymm2
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7]
; AVX2-ONLY-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -2211,7 +2211,7 @@ define void @load_i32_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: # ymm8 = mem[12,13,14,15],ymm12[0,1,2,3,4,5,6,7,8,9,10,11],mem[28,29,30,31],ymm12[16,17,18,19,20,21,22,23,24,25,26,27]
; AVX2-ONLY-NEXT: vpermq {{.*#+}} ymm8 = ymm8[0,3,2,3]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm8[0,1,2],ymm0[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm8 = [25769803777,25769803777,25769803777,25769803777]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm8 = [1,6,1,6,1,6,1,6]
; AVX2-ONLY-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload
; AVX2-ONLY-NEXT: vpermd %ymm11, %ymm8, %ymm10
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm10[6,7]
@@ -2256,7 +2256,7 @@ define void @load_i32_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} xmm8 = <4,1,6,u>
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm8, %ymm0
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2],ymm7[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm7 = [30064771074,30064771074,30064771074,30064771074]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm7 = [2,7,2,7,2,7,2,7]
; AVX2-ONLY-NEXT: vpermd %ymm11, %ymm7, %ymm3
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm3[6,7]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm6[4,5],ymm4[6,7]
@@ -4520,7 +4520,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3,4,5,6,7]
; AVX2-ONLY-NEXT: vmovdqa 288(%rdi), %ymm3
; AVX2-ONLY-NEXT: vmovdqu %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm0 = [21474836480,21474836480,21474836480,21474836480]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,5,0,5,0,5,0,5]
; AVX2-ONLY-NEXT: vpermd %ymm3, %ymm0, %ymm3
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm3[6,7]
; AVX2-ONLY-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -4623,7 +4623,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vpalignr {{.*#+}} ymm13 = ymm12[12,13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11],ymm12[28,29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27]
; AVX2-ONLY-NEXT: vpermq {{.*#+}} ymm13 = ymm13[0,3,2,3]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2],ymm0[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm0 = [25769803777,25769803777,25769803777,25769803777]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm0 = [1,6,1,6,1,6,1,6]
; AVX2-ONLY-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm15 # 32-byte Folded Reload
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm2 = ymm13[0,1,2,3,4,5],ymm15[6,7]
; AVX2-ONLY-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -4712,7 +4712,7 @@ define void @load_i32_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} xmm7 = <4,1,6,u>
; AVX2-ONLY-NEXT: vpermd %ymm4, %ymm7, %ymm4
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2],ymm0[3,4,5,6,7]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm6 = [30064771074,30064771074,30064771074,30064771074]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm6 = [2,7,2,7,2,7,2,7]
; AVX2-ONLY-NEXT: vpermd %ymm1, %ymm6, %ymm1
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX2-ONLY-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
index f20ae14fc1016..872fd8698cca1 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
@@ -132,14 +132,14 @@ define void @load_i32_stride6_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-FAST-LABEL: load_i32_stride6_vf2:
; AVX512F-FAST: # %bb.0:
; AVX512F-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm0 = [25769803776,25769803776]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm0 = [0,6,0,6]
; AVX512F-FAST-NEXT: vmovdqa (%rdi), %xmm1
; AVX512F-FAST-NEXT: vmovdqa 16(%rdi), %xmm2
; AVX512F-FAST-NEXT: vmovdqa 32(%rdi), %xmm3
; AVX512F-FAST-NEXT: vpermi2d %xmm2, %xmm1, %xmm0
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm4 = [30064771073,30064771073]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm4 = [1,7,1,7]
; AVX512F-FAST-NEXT: vpermi2d %xmm2, %xmm1, %xmm4
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [17179869186,17179869186]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [2,4,2,4]
; AVX512F-FAST-NEXT: vpermi2d %xmm3, %xmm1, %xmm2
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = <3,5,u,u>
; AVX512F-FAST-NEXT: vpermi2d %xmm3, %xmm1, %xmm5
@@ -190,14 +190,14 @@ define void @load_i32_stride6_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512BW-FAST-LABEL: load_i32_stride6_vf2:
; AVX512BW-FAST: # %bb.0:
; AVX512BW-FAST-NEXT: movq {{[0-9]+}}(%rsp), %rax
-; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm0 = [25769803776,25769803776]
+; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm0 = [0,6,0,6]
; AVX512BW-FAST-NEXT: vmovdqa (%rdi), %xmm1
; AVX512BW-FAST-NEXT: vmovdqa 16(%rdi), %xmm2
; AVX512BW-FAST-NEXT: vmovdqa 32(%rdi), %xmm3
; AVX512BW-FAST-NEXT: vpermi2d %xmm2, %xmm1, %xmm0
-; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm4 = [30064771073,30064771073]
+; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm4 = [1,7,1,7]
; AVX512BW-FAST-NEXT: vpermi2d %xmm2, %xmm1, %xmm4
-; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [17179869186,17179869186]
+; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm2 = [2,4,2,4]
; AVX512BW-FAST-NEXT: vpermi2d %xmm3, %xmm1, %xmm2
; AVX512BW-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = <3,5,u,u>
; AVX512BW-FAST-NEXT: vpermi2d %xmm3, %xmm1, %xmm5
@@ -631,14 +631,14 @@ define void @load_i32_stride6_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm9 = ymm8[0,2,2,2,4,6,6,6]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2],ymm9[3,4,5,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm9 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm10 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm10 = [4,2,4,2,4,2,4,2]
; AVX2-SLOW-NEXT: vpermps %ymm9, %ymm10, %ymm10
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm10[6,7]
; AVX2-SLOW-NEXT: vmovaps {{.*#+}} xmm10 = <1,7,5,u>
; AVX2-SLOW-NEXT: vpermps %ymm6, %ymm10, %ymm6
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm8 = ymm8[1,3,2,3,5,7,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2],ymm8[3,4,5,6,7]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm8 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm8 = [5,3,5,3,5,3,5,3]
; AVX2-SLOW-NEXT: vpermps %ymm9, %ymm8, %ymm8
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm8[6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm8 = ymm7[0,1],ymm5[2,3],ymm7[4,5],ymm5[6,7]
@@ -716,14 +716,14 @@ define void @load_i32_stride6_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-NEXT: vshufps {{.*#+}} ymm9 = ymm8[0,2,2,2,4,6,6,6]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2],ymm9[3,4,5,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm9 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm10 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm10 = [4,2,4,2,4,2,4,2]
; AVX2-FAST-NEXT: vpermps %ymm9, %ymm10, %ymm10
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm10[6,7]
; AVX2-FAST-NEXT: vmovaps {{.*#+}} xmm10 = <1,7,5,u>
; AVX2-FAST-NEXT: vpermps %ymm6, %ymm10, %ymm6
; AVX2-FAST-NEXT: vshufps {{.*#+}} ymm8 = ymm8[1,3,2,3,5,7,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2],ymm8[3,4,5,6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm8 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm8 = [5,3,5,3,5,3,5,3]
; AVX2-FAST-NEXT: vpermps %ymm9, %ymm8, %ymm8
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm8[6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm8 = ymm7[0,1],ymm5[2,3],ymm7[4,5],ymm5[6,7]
@@ -801,14 +801,14 @@ define void @load_i32_stride6_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm9 = ymm8[0,2,2,2,4,6,6,6]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2],ymm9[3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm9 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm10 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm10 = [4,2,4,2,4,2,4,2]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm9, %ymm10, %ymm10
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm10[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovaps {{.*#+}} xmm10 = <1,7,5,u>
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm6, %ymm10, %ymm6
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm8 = ymm8[1,3,2,3,5,7,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2],ymm8[3,4,5,6,7]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm8 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm8 = [5,3,5,3,5,3,5,3]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm9, %ymm8, %ymm8
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm8[6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm8 = ymm7[0,1],ymm5[2,3],ymm7[4,5],ymm5[6,7]
@@ -1434,7 +1434,7 @@ define void @load_i32_stride6_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm13 = ymm12[0,2,2,2,4,6,6,6]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2],ymm13[3,4,5,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm13 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm0 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm0 = [4,2,4,2,4,2,4,2]
; AVX2-SLOW-NEXT: vpermps %ymm13, %ymm0, %ymm14
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm14[6,7]
; AVX2-SLOW-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1455,7 +1455,7 @@ define void @load_i32_stride6_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vpermps %ymm4, %ymm0, %ymm3
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm4 = ymm12[1,3,2,3,5,7,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3,4,5,6,7]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm4 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm4 = [5,3,5,3,5,3,5,3]
; AVX2-SLOW-NEXT: vpermps %ymm13, %ymm4, %ymm12
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm12[6,7]
; AVX2-SLOW-NEXT: vmovups %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1610,7 +1610,7 @@ define void @load_i32_stride6_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vshufps {{.*#+}} ymm13 = ymm12[0,2,2,2,4,6,6,6]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2],ymm13[3,4,5,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm13 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm0 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm0 = [4,2,4,2,4,2,4,2]
; AVX2-FAST-NEXT: vpermps %ymm13, %ymm0, %ymm14
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm14[6,7]
; AVX2-FAST-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1631,7 +1631,7 @@ define void @load_i32_stride6_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vpermps %ymm4, %ymm0, %ymm3
; AVX2-FAST-NEXT: vshufps {{.*#+}} ymm4 = ymm12[1,3,2,3,5,7,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3,4,5,6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm4 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm4 = [5,3,5,3,5,3,5,3]
; AVX2-FAST-NEXT: vpermps %ymm13, %ymm4, %ymm12
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm12[6,7]
; AVX2-FAST-NEXT: vmovups %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1785,7 +1785,7 @@ define void @load_i32_stride6_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm13 = ymm12[0,2,2,2,4,6,6,6]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2],ymm13[3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm13 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm0 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm0 = [4,2,4,2,4,2,4,2]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm13, %ymm0, %ymm14
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm14[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -1806,7 +1806,7 @@ define void @load_i32_stride6_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm4, %ymm0, %ymm3
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm4 = ymm12[1,3,2,3,5,7,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3,4,5,6,7]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm4 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm4 = [5,3,5,3,5,3,5,3]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm13, %ymm4, %ymm12
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm12[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -3208,7 +3208,7 @@ define void @load_i32_stride6_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm3 = ymm11[0,2,2,2,4,6,6,6]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm3 = ymm2[0,1,2],ymm3[3,4,5,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm10 = ymm4[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm2 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4,2,4,2,4,2,4,2]
; AVX2-SLOW-NEXT: vpermps %ymm10, %ymm2, %ymm5
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5],ymm5[6,7]
; AVX2-SLOW-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -3277,7 +3277,7 @@ define void @load_i32_stride6_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 32-byte Folded Reload
; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm11 = ymm11[1,3,2,3,5,7,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2],ymm11[3,4,5,6,7]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm11 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm11 = [5,3,5,3,5,3,5,3]
; AVX2-SLOW-NEXT: vpermps %ymm10, %ymm11, %ymm10
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm10[6,7]
; AVX2-SLOW-NEXT: vmovups %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -3618,7 +3618,7 @@ define void @load_i32_stride6_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vshufps {{.*#+}} ymm3 = ymm11[0,2,2,2,4,6,6,6]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm2[0,1,2],ymm3[3,4,5,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm10 = ymm4[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm2 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4,2,4,2,4,2,4,2]
; AVX2-FAST-NEXT: vpermps %ymm10, %ymm2, %ymm5
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5],ymm5[6,7]
; AVX2-FAST-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -3687,7 +3687,7 @@ define void @load_i32_stride6_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 32-byte Folded Reload
; AVX2-FAST-NEXT: vshufps {{.*#+}} ymm11 = ymm11[1,3,2,3,5,7,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2],ymm11[3,4,5,6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm11 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm11 = [5,3,5,3,5,3,5,3]
; AVX2-FAST-NEXT: vpermps %ymm10, %ymm11, %ymm10
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm10[6,7]
; AVX2-FAST-NEXT: vmovups %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -4023,7 +4023,7 @@ define void @load_i32_stride6_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm3 = ymm11[0,2,2,2,4,6,6,6]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm3 = ymm2[0,1,2],ymm3[3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm10 = ymm4[0,1,2,3],ymm0[4,5,6,7]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm2 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4,2,4,2,4,2,4,2]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm10, %ymm2, %ymm5
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5],ymm5[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -4092,7 +4092,7 @@ define void @load_i32_stride6_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 32-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm11 = ymm11[1,3,2,3,5,7,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2],ymm11[3,4,5,6,7]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm11 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm11 = [5,3,5,3,5,3,5,3]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm10, %ymm11, %ymm10
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm10[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -6980,7 +6980,7 @@ define void @load_i32_stride6_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm1 = ymm7[0,1,2,3],ymm6[4,5,6,7]
; AVX2-SLOW-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm9 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm9 = [4,2,4,2,4,2,4,2]
; AVX2-SLOW-NEXT: vpermps %ymm1, %ymm9, %ymm1
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX2-SLOW-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -7147,7 +7147,7 @@ define void @load_i32_stride6_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vpermilps $237, {{[-0-9]+}}(%r{{[sb]}}p), %ymm14 # 32-byte Folded Reload
; AVX2-SLOW-NEXT: # ymm14 = mem[1,3,2,3,5,7,6,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm14 = ymm0[0,1,2],ymm14[3,4,5,6,7]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm0 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm0 = [5,3,5,3,5,3,5,3]
; AVX2-SLOW-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm15 # 32-byte Folded Reload
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm14 = ymm14[0,1,2,3,4,5],ymm15[6,7]
; AVX2-SLOW-NEXT: vmovups %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -7849,7 +7849,7 @@ define void @load_i32_stride6_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm7[0,1,2,3],ymm6[4,5,6,7]
; AVX2-FAST-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm10 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm10 = [4,2,4,2,4,2,4,2]
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm10, %ymm1
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX2-FAST-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -8016,7 +8016,7 @@ define void @load_i32_stride6_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vpermilps $237, {{[-0-9]+}}(%r{{[sb]}}p), %ymm14 # 32-byte Folded Reload
; AVX2-FAST-NEXT: # ymm14 = mem[1,3,2,3,5,7,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm14 = ymm0[0,1,2],ymm14[3,4,5,6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm0 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm0 = [5,3,5,3,5,3,5,3]
; AVX2-FAST-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm15 # 32-byte Folded Reload
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm14 = ymm14[0,1,2,3,4,5],ymm15[6,7]
; AVX2-FAST-NEXT: vmovups %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -8711,7 +8711,7 @@ define void @load_i32_stride6_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm1 = ymm7[0,1,2,3],ymm6[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm9 = [8589934596,8589934596,8589934596,8589934596]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm9 = [4,2,4,2,4,2,4,2]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm1, %ymm9, %ymm1
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
@@ -8878,7 +8878,7 @@ define void @load_i32_stride6_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vpermilps $237, {{[-0-9]+}}(%r{{[sb]}}p), %ymm14 # 32-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: # ymm14 = mem[1,3,2,3,5,7,6,7]
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm14 = ymm0[0,1,2],ymm14[3,4,5,6,7]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm0 = [12884901893,12884901893,12884901893,12884901893]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm0 = [5,3,5,3,5,3,5,3]
; AVX2-FAST-PERLANE-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm15 # 32-byte Folded Reload
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm14 = ymm14[0,1,2,3,4,5],ymm15[6,7]
; AVX2-FAST-PERLANE-NEXT: vmovups %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
index 3ca2103fdc52d..011485f16168e 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll
@@ -159,7 +159,7 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-FAST-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-FAST-NEXT: vmovdqa 32(%rdi), %xmm1
; AVX512F-FAST-NEXT: vpinsrd $1, 28(%rdi), %xmm0, %xmm2
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm3 = [17179869185,17179869185]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,4,1,4]
; AVX512F-FAST-NEXT: vpermi2d %xmm1, %xmm0, %xmm3
; AVX512F-FAST-NEXT: vpbroadcastd 8(%rdi), %xmm4
; AVX512F-FAST-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0],xmm1[1],xmm4[2,3]
@@ -227,7 +227,7 @@ define void @load_i32_stride7_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512BW-FAST-NEXT: vmovdqa (%rdi), %xmm0
; AVX512BW-FAST-NEXT: vmovdqa 32(%rdi), %xmm1
; AVX512BW-FAST-NEXT: vpinsrd $1, 28(%rdi), %xmm0, %xmm2
-; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm3 = [17179869185,17179869185]
+; AVX512BW-FAST-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,4,1,4]
; AVX512BW-FAST-NEXT: vpermi2d %xmm1, %xmm0, %xmm3
; AVX512BW-FAST-NEXT: vpbroadcastd 8(%rdi), %xmm4
; AVX512BW-FAST-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0],xmm1[1],xmm4[2,3]
@@ -910,7 +910,7 @@ define void @load_i32_stride7_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm13 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-SLOW-NEXT: vpermd %ymm13, %ymm12, %ymm12
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm10 = xmm12[0,1],xmm10[2,3]
-; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm12 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm12 = [0,7,0,7,0,7,0,7]
; AVX2-SLOW-NEXT: vpermd %ymm5, %ymm12, %ymm13
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3,4,5],ymm4[6,7]
; AVX2-SLOW-NEXT: vpbroadcastd 212(%rdi), %ymm14
@@ -979,7 +979,7 @@ define void @load_i32_stride7_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm7 = ymm7[0,1,2,0]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm6 = ymm7[0,1,2,3,4,5,6],ymm6[7]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm7 = ymm10[0,1],ymm9[2,3],ymm10[4,5],ymm9[6,7]
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm8 = [25769803781,25769803781,25769803781,25769803781]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm8 = [5,6,5,6,5,6,5,6]
; AVX2-FAST-NEXT: vpermd %ymm7, %ymm8, %ymm7
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm11 = <1,0,7,u,u,u,u,u>
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm8 = ymm1[0],ymm0[1],ymm1[2,3,4],ymm0[5],ymm1[6,7]
@@ -1013,7 +1013,7 @@ define void @load_i32_stride7_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm13 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-FAST-NEXT: vpermd %ymm13, %ymm12, %ymm12
; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm10 = xmm12[0,1],xmm10[2,3]
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm12 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm12 = [0,7,0,7,0,7,0,7]
; AVX2-FAST-NEXT: vpermd %ymm5, %ymm12, %ymm13
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3,4,5],ymm4[6,7]
; AVX2-FAST-NEXT: vpbroadcastd 212(%rdi), %ymm14
@@ -1116,7 +1116,7 @@ define void @load_i32_stride7_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm13 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpermd %ymm13, %ymm12, %ymm12
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm10 = xmm12[0,1],xmm10[2,3]
-; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm12 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm12 = [0,7,0,7,0,7,0,7]
; AVX2-FAST-PERLANE-NEXT: vpermd %ymm5, %ymm12, %ymm13
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm13 = ymm13[0,1,2,3,4,5],ymm4[6,7]
; AVX2-FAST-PERLANE-NEXT: vpbroadcastd 212(%rdi), %ymm14
@@ -1923,7 +1923,7 @@ define void @load_i32_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: vmovdqa %ymm7, %ymm12
; AVX2-SLOW-NEXT: vpermd %ymm10, %ymm5, %ymm10
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm10 = xmm10[0,1],xmm0[2,3]
-; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,7,0,7,0,7,0,7]
; AVX2-SLOW-NEXT: vpermd %ymm1, %ymm0, %ymm11
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm11 = ymm11[0,1,2,3,4,5],ymm2[6,7]
; AVX2-SLOW-NEXT: vpbroadcastd 212(%rdi), %ymm13
@@ -2073,7 +2073,7 @@ define void @load_i32_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm7 = ymm7[0,1,2,0]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm7[0,1,2,3,4,5,6],ymm2[7]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm7 = ymm3[0,1],ymm11[2,3],ymm3[4,5],ymm11[6,7]
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm0 = [25769803781,25769803781,25769803781,25769803781]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm0 = [5,6,5,6,5,6,5,6]
; AVX2-FAST-NEXT: vpermd %ymm7, %ymm0, %ymm8
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm7 = [1,0,7,7,5,4,7,7]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm6[0],ymm12[1],ymm6[2,3,4],ymm12[5],ymm6[6,7]
@@ -2149,7 +2149,7 @@ define void @load_i32_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vmovdqa %ymm6, %ymm8
; AVX2-FAST-NEXT: vpermd %ymm4, %ymm3, %ymm4
; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0,1],xmm0[2,3]
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm0 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,7,0,7,0,7,0,7]
; AVX2-FAST-NEXT: vpermd %ymm10, %ymm0, %ymm5
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm9[6,7]
; AVX2-FAST-NEXT: vpbroadcastd 212(%rdi), %ymm7
@@ -2376,7 +2376,7 @@ define void @load_i32_stride7_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm7, %ymm12
; AVX2-FAST-PERLANE-NEXT: vpermd %ymm10, %ymm5, %ymm10
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm10 = xmm10[0,1],xmm0[2,3]
-; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm0 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,7,0,7,0,7,0,7]
; AVX2-FAST-PERLANE-NEXT: vpermd %ymm1, %ymm0, %ymm11
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm11 = ymm11[0,1,2,3,4,5],ymm2[6,7]
; AVX2-FAST-PERLANE-NEXT: vpbroadcastd 212(%rdi), %ymm13
@@ -4304,7 +4304,7 @@ define void @load_i32_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: # ymm2 = mem[0,1,2,3],ymm2[4,5,6,7]
; AVX2-SLOW-NEXT: vpermps %ymm2, %ymm3, %ymm2
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm4 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm4 = [0,7,0,7,0,7,0,7]
; AVX2-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Reload
; AVX2-SLOW-NEXT: vpermps %ymm6, %ymm4, %ymm2
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm12[6,7]
@@ -4625,7 +4625,7 @@ define void @load_i32_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3,4,5,6],ymm0[7]
; AVX2-FAST-NEXT: vmovdqa 64(%rdi), %ymm6
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm10[0,1],ymm6[2,3],ymm10[4,5],ymm6[6,7]
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm0 = [25769803781,25769803781,25769803781,25769803781]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm0 = [5,6,5,6,5,6,5,6]
; AVX2-FAST-NEXT: vpermd %ymm1, %ymm0, %ymm3
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,7,7,5,4,7,7]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm4 = ymm11[0],ymm12[1],ymm11[2,3,4],ymm12[5],ymm11[6,7]
@@ -4815,7 +4815,7 @@ define void @load_i32_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: # ymm3 = mem[0,1,2,3],ymm3[4,5,6,7]
; AVX2-FAST-NEXT: vpermd %ymm3, %ymm0, %ymm3
; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm1 = xmm3[0,1],xmm1[2,3]
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm11 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm11 = [0,7,0,7,0,7,0,7]
; AVX2-FAST-NEXT: vpermd %ymm7, %ymm11, %ymm3
; AVX2-FAST-NEXT: vmovdqa %ymm7, %ymm12
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm14[6,7]
@@ -5327,7 +5327,7 @@ define void @load_i32_stride7_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: # ymm2 = mem[0,1,2,3],ymm2[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm2, %ymm3, %ymm2
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm4 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm4 = [0,7,0,7,0,7,0,7]
; AVX2-FAST-PERLANE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Reload
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm6, %ymm4, %ymm2
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm12[6,7]
@@ -9208,7 +9208,7 @@ define void @load_i32_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-SLOW-NEXT: # ymm2 = mem[0,1,2,3],ymm2[4,5,6,7]
; AVX2-SLOW-NEXT: vpermps %ymm2, %ymm5, %ymm2
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm9 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm9 = [0,7,0,7,0,7,0,7]
; AVX2-SLOW-NEXT: vpermps %ymm3, %ymm9, %ymm2
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm4[6,7]
; AVX2-SLOW-NEXT: vbroadcastss 212(%rdi), %ymm3
@@ -9846,7 +9846,7 @@ define void @load_i32_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; AVX2-FAST-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm8 # 32-byte Reload
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm8[0,1],ymm0[2,3],ymm8[4,5],ymm0[6,7]
-; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [25769803781,25769803781,25769803781,25769803781]
+; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [5,6,5,6,5,6,5,6]
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm3
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [1,0,7,7,5,4,7,7]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm12[0],ymm13[1],ymm12[2,3,4],ymm13[5],ymm12[6,7]
@@ -10250,7 +10250,7 @@ define void @load_i32_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-NEXT: # ymm2 = ymm2[0,1,2,3],mem[4,5,6,7]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm5, %ymm2
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm10 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm10 = [0,7,0,7,0,7,0,7]
; AVX2-FAST-NEXT: vpermps %ymm3, %ymm10, %ymm2
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm4[6,7]
; AVX2-FAST-NEXT: vbroadcastss 212(%rdi), %ymm3
@@ -11299,7 +11299,7 @@ define void @load_i32_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
; AVX2-FAST-PERLANE-NEXT: # ymm2 = mem[0,1,2,3],ymm2[4,5,6,7]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm2, %ymm5, %ymm2
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm9 = [30064771072,30064771072,30064771072,30064771072]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm9 = [0,7,0,7,0,7,0,7]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm3, %ymm9, %ymm2
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm4[6,7]
; AVX2-FAST-PERLANE-NEXT: vbroadcastss 212(%rdi), %ymm3
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
index 042c6d2cc02ba..7e9cce1a7e8db 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll
@@ -855,7 +855,7 @@ define void @load_i8_stride4_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} xmm7 = xmm7[0,1],xmm6[2,3]
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm8 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-ONLY-NEXT: vpshufb %ymm8, %ymm1, %ymm9
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm6 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm6 = [0,4,0,4,0,4,0,4]
; AVX2-ONLY-NEXT: vpermd %ymm9, %ymm6, %ymm9
; AVX2-ONLY-NEXT: vpshufb %ymm8, %ymm0, %ymm8
; AVX2-ONLY-NEXT: vpermd %ymm8, %ymm6, %ymm8
@@ -1758,7 +1758,7 @@ define void @load_i8_stride4_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vpshufb %ymm13, %ymm1, %ymm9
; AVX2-ONLY-NEXT: vmovdqa %ymm1, %ymm4
; AVX2-ONLY-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm2 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,4,0,4,0,4,0,4]
; AVX2-ONLY-NEXT: vpermd %ymm9, %ymm2, %ymm9
; AVX2-ONLY-NEXT: vpshufb %ymm13, %ymm0, %ymm11
; AVX2-ONLY-NEXT: vpermd %ymm11, %ymm2, %ymm11
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll
index ab9da42de3ca3..55d943b52659d 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll
@@ -1741,7 +1741,7 @@ define void @load_i8_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,0,5,10,15,20,25,30,19,24,29,18,23,28,u,u,u,u,u,u,u]
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2],ymm1[3,4,5,6,7]
; AVX2-ONLY-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,1,6,11,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,21,26,31,u,u,u,u,u,u,u,u]
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm2 = [21474836480,21474836480,21474836480,21474836480]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,5,0,5,0,5,0,5]
; AVX2-ONLY-NEXT: vpermd %ymm0, %ymm2, %ymm0
; AVX2-ONLY-NEXT: vpblendvb %ymm8, %ymm1, %ymm0, %ymm0
; AVX2-ONLY-NEXT: vmovdqa %ymm6, (%rsi)
@@ -1847,7 +1847,7 @@ define void @load_i8_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2],ymm0[3,4,5,6,7]
; AVX512F-NEXT: vmovdqa 128(%rdi), %ymm1
; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,1,6,11,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,21,26,31,u,u,u,u,u,u,u,u]
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [21474836480,21474836480,21474836480,21474836480]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,5,0,5,0,5,0,5]
; AVX512F-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX512F-NEXT: vpternlogq $184, %ymm0, %ymm8, %ymm1
; AVX512F-NEXT: vmovdqa %ymm6, (%rsi)
@@ -1959,7 +1959,7 @@ define void @load_i8_stride5_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3,4,5,6,7]
; AVX512BW-NEXT: vmovdqa 128(%rdi), %ymm1
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,1,6,11,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,21,26,31,u,u,u,u,u,u,u,u]
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [21474836480,21474836480,21474836480,21474836480]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,5,0,5,0,5,0,5]
; AVX512BW-NEXT: vpermd %ymm1, %ymm2, %ymm1
; AVX512BW-NEXT: vmovdqu8 %ymm1, %ymm0 {%k3}
; AVX512BW-NEXT: vmovdqa %ymm4, (%rsi)
@@ -3724,7 +3724,7 @@ define void @load_i8_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX2-ONLY-NEXT: vmovdqa 288(%rdi), %ymm10
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm2 = <u,1,6,11,0,5,10,15,u,u,u,u,u,u,u,u,u,1,6,11,0,5,10,15,u,u,u,u,u,u,u,u>
; AVX2-ONLY-NEXT: vpshufb %ymm2, %ymm10, %ymm10
-; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm0 = [21474836480,21474836480,21474836480,21474836480]
+; AVX2-ONLY-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,5,0,5,0,5,0,5]
; AVX2-ONLY-NEXT: vpermd %ymm10, %ymm0, %ymm10
; AVX2-ONLY-NEXT: vpblendvb %ymm12, %ymm6, %ymm10, %ymm6
; AVX2-ONLY-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Reload
@@ -3960,7 +3960,7 @@ define void @load_i8_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3,4,5,6,7]
; AVX512F-NEXT: vmovdqa 128(%rdi), %ymm2
; AVX512F-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,1,6,11,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,21,26,31,u,u,u,u,u,u,u,u]
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm5 = [21474836480,21474836480,21474836480,21474836480]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm5 = [0,5,0,5,0,5,0,5]
; AVX512F-NEXT: vpermd %ymm2, %ymm5, %ymm2
; AVX512F-NEXT: vpternlogq $226, %zmm0, %zmm4, %zmm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0
@@ -4173,7 +4173,7 @@ define void @load_i8_stride5_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
; AVX512BW-NEXT: vmovdqa 128(%rdi), %ymm2
; AVX512BW-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,1,6,11,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,21,26,31,u,u,u,u,u,u,u,u]
-; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [21474836480,21474836480,21474836480,21474836480]
+; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [0,5,0,5,0,5,0,5]
; AVX512BW-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k5}
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
index 40686eae642d8..6a959b11bf142 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
@@ -501,7 +501,7 @@ define void @load_i8_stride7_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX1-ONLY-NEXT: vpalignr {{.*#+}} xmm5 = xmm2[3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2]
; AVX1-ONLY-NEXT: vmovdqa {{.*#+}} xmm6 = <u,u,u,u,u,0,7,14,u,u,u,u,u,u,u,u>
; AVX1-ONLY-NEXT: vpshufb %xmm6, %xmm5, %xmm5
-; AVX1-ONLY-NEXT: vmovddup {{.*#+}} xmm7 = [1099511627775,1099511627775]
+; AVX1-ONLY-NEXT: vmovddup {{.*#+}} xmm7 = [255,255,255,255,255,0,0,0,255,255,255,255,255,0,0,0]
; AVX1-ONLY-NEXT: # xmm7 = mem[0,0]
; AVX1-ONLY-NEXT: vpblendvb %xmm7, %xmm4, %xmm5, %xmm4
; AVX1-ONLY-NEXT: vpshufb {{.*#+}} xmm5 = zero,zero,zero,xmm1[6,13,u,u,u,u,u,u,u,u,u,u,u]
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll
index d3ede15babd75..083e6e7f4b1de 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll
@@ -271,10 +271,10 @@ define void @store_i32_stride6_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: # ymm10 = mem[0,1,0,1]
; AVX2-ONLY-NEXT: vpermps %ymm6, %ymm10, %ymm10
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm9 = ymm10[0,1],ymm9[2,3],ymm10[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [0,4,0,4,0,4,0,4]
; AVX2-ONLY-NEXT: vpermps %ymm8, %ymm10, %ymm10
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm10[4,5],ymm9[6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [25769803778,25769803778,25769803778,25769803778]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [2,6,2,6,2,6,2,6]
; AVX2-ONLY-NEXT: vpermps %ymm6, %ymm10, %ymm6
; AVX2-ONLY-NEXT: vbroadcastf128 {{.*#+}} ymm10 = [1,5,2,6,1,5,2,6]
; AVX2-ONLY-NEXT: # ymm10 = mem[0,1,0,1]
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
index 55abaa3caedf4..6bbba6fc39143 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll
@@ -445,7 +445,7 @@ define void @store_i32_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-SLOW-NEXT: # ymm9 = mem[0,1,0,1]
; AVX2-SLOW-NEXT: vpermps %ymm4, %ymm9, %ymm4
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm6[2,3],ymm4[4,5,6,7]
-; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm6 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm6 = [0,4,0,4,0,4,0,4]
; AVX2-SLOW-NEXT: vpermps %ymm7, %ymm6, %ymm6
; AVX2-SLOW-NEXT: vbroadcastss (%r10), %ymm7
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm7[6,7]
@@ -499,7 +499,7 @@ define void @store_i32_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-NEXT: # ymm5 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm2, %ymm5, %ymm2
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5,6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm3 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm3 = [0,4,0,4,0,4,0,4]
; AVX2-FAST-NEXT: vpermps %ymm6, %ymm3, %ymm3
; AVX2-FAST-NEXT: vbroadcastss (%r10), %ymm5
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm5[6,7]
@@ -554,7 +554,7 @@ define void @store_i32_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-FAST-PERLANE-NEXT: # ymm9 = mem[0,1,0,1]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm4, %ymm9, %ymm4
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm6[2,3],ymm4[4,5,6,7]
-; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm6 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm6 = [0,4,0,4,0,4,0,4]
; AVX2-FAST-PERLANE-NEXT: vpermps %ymm7, %ymm6, %ymm6
; AVX2-FAST-PERLANE-NEXT: vbroadcastss (%r10), %ymm7
; AVX2-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm7[6,7]
@@ -2142,7 +2142,7 @@ define void @store_i32_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX2-FAST-NEXT: vmovaps 32(%r8), %ymm10
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3,4,5],ymm10[6,7]
; AVX2-FAST-NEXT: vmovaps 32(%r9), %ymm4
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm15 = [25769803781,25769803781,25769803781,25769803781]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm15 = [5,6,5,6,5,6,5,6]
; AVX2-FAST-NEXT: vpermps %ymm4, %ymm15, %ymm15
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm1 = ymm15[0],ymm1[1,2,3,4,5,6],ymm15[7]
; AVX2-FAST-NEXT: vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[2,3]
@@ -4685,7 +4685,7 @@ define void @store_i32_stride7_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX2-FAST-NEXT: vunpckhps {{.*#+}} ymm0 = ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[6],ymm1[6],ymm4[7],ymm1[7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm15 = ymm15[0,1,2,3],ymm0[4,5,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm15 = ymm15[0,1,2,3,4,5],mem[6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm11 = [25769803781,25769803781,25769803781,25769803781]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm11 = [5,6,5,6,5,6,5,6]
; AVX2-FAST-NEXT: vpermps 96(%r9), %ymm11, %ymm11
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm11 = ymm11[0],ymm15[1,2,3,4,5,6],ymm11[7]
; AVX2-FAST-NEXT: vmovaps 96(%rax), %ymm15
@@ -9802,7 +9802,7 @@ define void @store_i32_stride7_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve
; AVX2-FAST-NEXT: vunpckhps {{.*#+}} ymm15 = ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[6],ymm0[6],ymm3[7],ymm0[7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm7 = ymm7[0,1,2,3],ymm15[4,5,6,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm6 = ymm7[0,1,2,3,4,5],ymm6[6,7]
-; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm7 = [25769803781,25769803781,25769803781,25769803781]
+; AVX2-FAST-NEXT: vbroadcastsd {{.*#+}} ymm7 = [5,6,5,6,5,6,5,6]
; AVX2-FAST-NEXT: vpermps 224(%r9), %ymm7, %ymm7
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm6 = ymm7[0],ymm6[1,2,3,4,5,6],ymm7[7]
; AVX2-FAST-NEXT: vmovaps 224(%rax), %ymm7
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
index 0366e426342e0..c20180523661e 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll
@@ -259,7 +259,7 @@ define void @store_i32_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm8
; AVX2-ONLY-NEXT: vinsertf128 $1, (%r9), %ymm5, %ymm5
; AVX2-ONLY-NEXT: vinsertf128 $1, (%r10), %ymm6, %ymm7
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm6 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm6 = [0,4,0,4,0,4,0,4]
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm6, %ymm9
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm6, %ymm6
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm9[6,7]
@@ -268,7 +268,7 @@ define void @store_i32_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm10 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm9 = xmm10[0,1],xmm9[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm6 = ymm9[0,1,2,3],ymm6[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm9 = [21474836481,21474836481,21474836481,21474836481]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm9 = [1,5,1,5,1,5,1,5]
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm9, %ymm10
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm9, %ymm9
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm9 = ymm9[0,1,2,3,4,5],ymm10[6,7]
@@ -277,7 +277,7 @@ define void @store_i32_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vunpcklps {{.*#+}} xmm11 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm10 = xmm10[0,1],xmm11[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm9 = ymm10[0,1,2,3],ymm9[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [25769803778,25769803778,25769803778,25769803778]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm10 = [2,6,2,6,2,6,2,6]
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm10, %ymm11
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm10, %ymm10
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm10 = ymm10[0,1,2,3,4,5],ymm11[6,7]
@@ -286,7 +286,7 @@ define void @store_i32_stride8_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX2-ONLY-NEXT: vunpckhps {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0,1],xmm8[2,3]
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm10[4,5,6,7]
-; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm4 = [30064771075,30064771075,30064771075,30064771075]
+; AVX2-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm4 = [3,7,3,7,3,7,3,7]
; AVX2-ONLY-NEXT: vpermps %ymm7, %ymm4, %ymm7
; AVX2-ONLY-NEXT: vpermps %ymm5, %ymm4, %ymm4
; AVX2-ONLY-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm7[6,7]
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
index 529f6d2ae913b..d71a6f8eeb5ae 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll
@@ -1599,7 +1599,7 @@ define void @store_i8_stride8_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,2,2,3,4,5,6,7]
; AVX1-ONLY-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,0,2,1,4,4,6,5]
-; AVX1-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [281474976710655,281474976710655,281474976710655,281474976710655]
+; AVX1-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm0 = [65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0]
; AVX1-ONLY-NEXT: vandnps %ymm2, %ymm0, %ymm2
; AVX1-ONLY-NEXT: vmovdqa (%r9), %xmm7
; AVX1-ONLY-NEXT: vmovdqa (%r8), %xmm8
@@ -1626,7 +1626,7 @@ define void @store_i8_stride8_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vpshuflw {{.*#+}} xmm14 = xmm15[0,2,2,3,4,5,6,7]
; AVX1-ONLY-NEXT: vpmovzxdq {{.*#+}} xmm14 = xmm14[0],zero,xmm14[1],zero
; AVX1-ONLY-NEXT: vinsertf128 $1, %xmm14, %ymm2, %ymm14
-; AVX1-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm2 = [18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855]
+; AVX1-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535]
; AVX1-ONLY-NEXT: vandps %ymm2, %ymm13, %ymm13
; AVX1-ONLY-NEXT: vandnps %ymm14, %ymm2, %ymm14
; AVX1-ONLY-NEXT: vorps %ymm14, %ymm13, %ymm13
@@ -4158,7 +4158,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vpshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,6,6,7]
; AVX1-ONLY-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
-; AVX1-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm5 = [281474976710655,281474976710655,281474976710655,281474976710655]
+; AVX1-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm5 = [65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0]
; AVX1-ONLY-NEXT: vandnps %ymm0, %ymm5, %ymm0
; AVX1-ONLY-NEXT: vmovdqa (%r9), %xmm3
; AVX1-ONLY-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -4190,7 +4190,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX1-ONLY-NEXT: vpmovzxwq {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero
; AVX1-ONLY-NEXT: vinsertf128 $1, %xmm8, %ymm7, %ymm7
; AVX1-ONLY-NEXT: vshufps {{.*#+}} ymm8 = ymm4[2,1,3,3,6,5,7,7]
-; AVX1-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm4 = [18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855]
+; AVX1-ONLY-NEXT: vbroadcastsd {{.*#+}} ymm4 = [65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535]
; AVX1-ONLY-NEXT: vandnps %ymm8, %ymm4, %ymm8
; AVX1-ONLY-NEXT: vandps %ymm4, %ymm7, %ymm7
; AVX1-ONLY-NEXT: vorps %ymm7, %ymm8, %ymm7
@@ -5972,7 +5972,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-SLOW-NEXT: vpshufd $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: # ymm5 = mem[2,1,3,3,6,5,7,7]
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm6
-; AVX512F-SLOW-NEXT: vpbroadcastq {{.*#+}} zmm5 = [18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855]
+; AVX512F-SLOW-NEXT: vpbroadcastq {{.*#+}} zmm5 = [65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535]
; AVX512F-SLOW-NEXT: vpternlogq $184, %zmm0, %zmm5, %zmm6
; AVX512F-SLOW-NEXT: vpshufd $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: # ymm0 = mem[0,0,2,1,4,4,6,5]
@@ -5984,7 +5984,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-SLOW-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload
; AVX512F-SLOW-NEXT: # ymm7 = mem[0,2,2,3,4,6,6,7]
; AVX512F-SLOW-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm7
-; AVX512F-SLOW-NEXT: vpbroadcastq {{.*#+}} zmm0 = [281474976710655,281474976710655,281474976710655,281474976710655,281474976710655,281474976710655,281474976710655,281474976710655]
+; AVX512F-SLOW-NEXT: vpbroadcastq {{.*#+}} zmm0 = [65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0]
; AVX512F-SLOW-NEXT: vpandnq %zmm4, %zmm0, %zmm4
; AVX512F-SLOW-NEXT: vpandq %zmm0, %zmm7, %zmm7
; AVX512F-SLOW-NEXT: movw $-21846, %ax # imm = 0xAAAA
@@ -6415,9 +6415,9 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512F-FAST-NEXT: vmovdqa {{.*#+}} ymm9 = <8,9,u,u,u,u,u,u,10,11,u,u,u,u,u,u,12,13,u,u,u,u,u,u,14,15,u,u,u,u,u,u>
; AVX512F-FAST-NEXT: vpshufb %ymm9, %ymm0, %ymm0
; AVX512F-FAST-NEXT: vinserti64x4 $1, %ymm0, %zmm6, %zmm8
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} zmm6 = [18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855,18446744069414649855]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} zmm6 = [65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535]
; AVX512F-FAST-NEXT: vpternlogq $226, {{[-0-9]+}}(%r{{[sb]}}p), %zmm6, %zmm8 # 64-byte Folded Reload
-; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} zmm0 = [281474976710655,281474976710655,281474976710655,281474976710655,281474976710655,281474976710655,281474976710655,281474976710655]
+; AVX512F-FAST-NEXT: vpbroadcastq {{.*#+}} zmm0 = [65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0]
; AVX512F-FAST-NEXT: vpandnq %zmm29, %zmm0, %zmm29
; AVX512F-FAST-NEXT: vpandq %zmm0, %zmm17, %zmm17
; AVX512F-FAST-NEXT: movw $-21846, %ax # imm = 0xAAAA
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll b/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
index 1f903143ec9d1..c57f4d9cb59b2 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add-mask.ll
@@ -612,7 +612,7 @@ define i32 @test_v16i32_v16i8(<16 x i32> %a0) {
;
; AVX2-LABEL: test_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
@@ -745,7 +745,7 @@ define i32 @test_v32i32_v32i8(<32 x i32> %a0) {
;
; AVX2-LABEL: test_v32i32_v32i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 580fdcddbc3c7..e4cc9731c6105 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -650,7 +650,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
;
; AVX2-LABEL: trunc_v16i32_v16i1:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -736,7 +736,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) {
;
; AVX2-LABEL: trunc_v32i16_v32i1:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 8374161d6de0d..17f346138a92d 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -313,7 +313,7 @@ define <16 x i32> @splatvar_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind
define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
; AVX512F-LABEL: splatvar_rotate_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm5
@@ -330,7 +330,7 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind
;
; AVX512VL-LABEL: splatvar_rotate_v32i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,15]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [15,0,0,0,15,0,0,0]
; AVX512VL-NEXT: vpandn %xmm2, %xmm1, %xmm3
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4
; AVX512VL-NEXT: vpsrlw $1, %ymm4, %ymm5
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index ed7d22356de8f..3eef750029791 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -2524,7 +2524,7 @@ define <16 x i8> @load_sext_16i1_to_16i8(ptr%ptr) nounwind readnone {
; AVX1-NEXT: movzwl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm1 = mem[0,0]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -2535,7 +2535,7 @@ define <16 x i8> @load_sext_16i1_to_16i8(ptr%ptr) nounwind readnone {
; AVX2-NEXT: movzwl (%rdi), %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
@@ -2684,7 +2684,7 @@ define <32 x i8> @load_sext_32i1_to_32i8(ptr%ptr) nounwind readnone {
; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
@@ -2696,7 +2696,7 @@ define <32 x i8> @load_sext_32i1_to_32i8(ptr%ptr) nounwind readnone {
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index f29d60e6edc59..99c735dec13c0 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -89,7 +89,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; X86-AVX1-LABEL: var_shift_v4i64:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: # xmm3 = mem[0,0]
; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4
; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
@@ -2169,7 +2169,7 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; X86-AVX1-LABEL: PR52719:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
; X86-AVX1-NEXT: # xmm2 = mem[0,0]
; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 66ca022f8de20..4e9d17801f5ce 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -806,7 +806,7 @@ define <16 x i8> @shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(
;
; AVX512VLVBMI-LABEL: shuffle_v16i8_02_20_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
; AVX512VLVBMI: # %bb.0:
-; AVX512VLVBMI-NEXT: vpbroadcastw {{.*#+}} xmm2 = [5122,5122,5122,5122,5122,5122,5122,5122]
+; AVX512VLVBMI-NEXT: vpbroadcastw {{.*#+}} xmm2 = [2,20,2,20,2,20,2,20,2,20,2,20,2,20,2,20]
; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0
; AVX512VLVBMI-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 5b216d0cdd9c5..ea4549aa67b98 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -2631,7 +2631,7 @@ define <8 x i16> @shuffle_v8i16_048C048C(<8 x i16> %a, <8 x i16> %b) {
;
; AVX512VL-LABEL: shuffle_v8i16_048C048C:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [3377734080528384,3377734080528384]
+; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12]
; AVX512VL-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
; AVX512VL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
index 74cd5dd8f86f1..e858c7cdbfa29 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -1294,7 +1294,7 @@ define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_0
define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
@@ -1307,7 +1307,7 @@ define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_3
;
; XOPAVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
+; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
; XOPAVX1-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
@@ -1322,7 +1322,7 @@ define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_3
define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
@@ -1335,7 +1335,7 @@ define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_1
;
; XOPAVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
+; XOPAVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0,65535,0]
; XOPAVX1-NEXT: vpcmov %ymm2, %ymm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index 465907e487277..a3dd5bf3a1d42 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -2285,7 +2285,7 @@ define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_
define <32 x i8> @load_fold_pblendvb(ptr %px, <32 x i8> %y) {
; AVX1-LABEL: load_fold_pblendvb:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [18374686483949879295,18374686483949879295,18374686483949879295,18374686483949879295]
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
; AVX1-NEXT: vandnps (%rdi), %ymm1, %ymm2
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
@@ -2306,7 +2306,7 @@ define <32 x i8> @load_fold_pblendvb(ptr %px, <32 x i8> %y) {
;
; XOPAVX1-LABEL: load_fold_pblendvb:
; XOPAVX1: # %bb.0:
-; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [18374686483949879295,18374686483949879295,18374686483949879295,18374686483949879295]
+; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
; XOPAVX1-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0
; XOPAVX1-NEXT: retq
;
@@ -2323,7 +2323,7 @@ define <32 x i8> @load_fold_pblendvb(ptr %px, <32 x i8> %y) {
define <32 x i8> @load_fold_pblendvb_commute(ptr %px, <32 x i8> %y) {
; AVX1-LABEL: load_fold_pblendvb_commute:
; AVX1: # %bb.0:
-; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [18374686483949879295,18374686483949879295,18374686483949879295,18374686483949879295]
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vandps (%rdi), %ymm1, %ymm1
; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
@@ -2347,7 +2347,7 @@ define <32 x i8> @load_fold_pblendvb_commute(ptr %px, <32 x i8> %y) {
; XOPAVX1-LABEL: load_fold_pblendvb_commute:
; XOPAVX1: # %bb.0:
; XOPAVX1-NEXT: vmovdqa (%rdi), %ymm1
-; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [18374686483949879295,18374686483949879295,18374686483949879295,18374686483949879295]
+; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
; XOPAVX1-NEXT: vpcmov %ymm2, %ymm0, %ymm1, %ymm0
; XOPAVX1-NEXT: retq
;
@@ -5073,20 +5073,20 @@ define <32 x i8> @PR55066(<32 x i8> %a0) {
; AVX2-LABEL: PR55066:
; AVX2: # %bb.0:
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512VLBW-LABEL: PR55066:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869184,17179869184,17179869184,17179869184]
+; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
; AVX512VLBW-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI-LABEL: PR55066:
; AVX512VLVBMI: # %bb.0:
-; AVX512VLVBMI-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2024390091656922112,2024390091656922112,2024390091656922112,2024390091656922112]
+; AVX512VLVBMI-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28]
; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
; AVX512VLVBMI-NEXT: retq
;
@@ -5100,7 +5100,7 @@ define <32 x i8> @PR55066(<32 x i8> %a0) {
; XOPAVX2-LABEL: PR55066:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
-; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [17179869184,17179869184,17179869184,17179869184]
+; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
; XOPAVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; XOPAVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a0, <32 x i8> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
index ad25c8096a109..8a302e026b6b4 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -336,7 +336,7 @@ define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
;
; AVX512VL-FAST-ALL-LABEL: shuffle_v8f32_08080808:
; AVX512VL-FAST-ALL: # %bb.0:
-; AVX512VL-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [34359738368,34359738368,34359738368,34359738368]
+; AVX512VL-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,8,0,8,0,8,0,8]
; AVX512VL-FAST-ALL-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
; AVX512VL-FAST-ALL-NEXT: retq
;
@@ -1970,7 +1970,7 @@ define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
;
; AVX512VL-FAST-ALL-LABEL: shuffle_v8i32_08080808:
; AVX512VL-FAST-ALL: # %bb.0:
-; AVX512VL-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [34359738368,34359738368,34359738368,34359738368]
+; AVX512VL-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,8,0,8,0,8,0,8]
; AVX512VL-FAST-ALL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0
; AVX512VL-FAST-ALL-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
index 65be0085cac25..d44e584599246 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
@@ -503,7 +503,7 @@ define <64 x i8> @shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512F-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1]
-; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14]
; AVX512F-NEXT: vpshufb %ymm3, %ymm2, %ymm2
@@ -526,7 +526,7 @@ define <64 x i8> @shuffle_v64i8_63_64_61_66_59_68_57_70_55_72_53_74_51_76_49_78_
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; AVX512DQ-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,3,0,1]
-; AVX512DQ-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512DQ-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14,15,0,13,2,11,4,9,6,7,8,5,10,3,12,1,14]
; AVX512DQ-NEXT: vpshufb %ymm3, %ymm2, %ymm2
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 0dca69aaaf0b4..66d2ccff6d77f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -336,7 +336,7 @@ define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){
;
; AVX512F-LABEL: test_mm512_mask_blend_epi8:
; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm2
; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0
; AVX512F-NEXT: ret{{[l|q]}}
diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll
index c5fcacc5f2a42..39dc8662c7a4e 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-math.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll
@@ -253,7 +253,7 @@ define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX2-NEXT: vpaddq %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpaddq %ymm6, %ymm2, %ymm2
; AVX2-NEXT: vpaddq %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -325,7 +325,7 @@ define <16 x i8> @trunc_add_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -650,7 +650,7 @@ define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX2-LABEL: trunc_add_const_v16i64_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -710,7 +710,7 @@ define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX2-LABEL: trunc_add_const_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -1031,7 +1031,7 @@ define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX2-NEXT: vpsubq %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpsubq %ymm6, %ymm2, %ymm2
; AVX2-NEXT: vpsubq %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -1103,7 +1103,7 @@ define <16 x i8> @trunc_sub_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
; AVX2: # %bb.0:
; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpsubd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -1398,7 +1398,7 @@ define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX2-LABEL: trunc_sub_const_v16i64_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -1458,7 +1458,7 @@ define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX2-LABEL: trunc_sub_const_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -1880,7 +1880,7 @@ define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX2-NEXT: vpmuludq %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpmuludq %ymm6, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -1996,7 +1996,7 @@ define <16 x i8> @trunc_mul_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpmulld %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -2345,7 +2345,7 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX2-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -2455,7 +2455,7 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -2755,7 +2755,7 @@ define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX2-NEXT: vpand %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm6, %ymm2, %ymm2
; AVX2-NEXT: vpand %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -2821,7 +2821,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
; AVX2: # %bb.0:
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -3095,7 +3095,7 @@ define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX2-LABEL: trunc_and_const_v16i64_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -3155,7 +3155,7 @@ define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX2-LABEL: trunc_and_const_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -3452,7 +3452,7 @@ define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin
; AVX2-NEXT: vpxor %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpxor %ymm6, %ymm2, %ymm2
; AVX2-NEXT: vpxor %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -3518,7 +3518,7 @@ define <16 x i8> @trunc_xor_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -3792,7 +3792,7 @@ define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX2-LABEL: trunc_xor_const_v16i64_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -3852,7 +3852,7 @@ define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX2-LABEL: trunc_xor_const_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -4149,7 +4149,7 @@ define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind
; AVX2-NEXT: vpor %ymm5, %ymm1, %ymm1
; AVX2-NEXT: vpor %ymm6, %ymm2, %ymm2
; AVX2-NEXT: vpor %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -4215,7 +4215,7 @@ define <16 x i8> @trunc_or_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind
; AVX2: # %bb.0:
; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -4489,7 +4489,7 @@ define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind {
;
; AVX2-LABEL: trunc_or_const_v16i64_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
; AVX2-NEXT: vpackusdw %ymm3, %ymm2, %ymm2
@@ -4549,7 +4549,7 @@ define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind {
;
; AVX2-LABEL: trunc_or_const_v16i32_v16i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 4b73e5a590a39..87cc6a86d7dd0 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -294,7 +294,7 @@ define void @trunc8i64_8i8(<8 x i64> %a) {
;
; AVX2-LABEL: trunc8i64_8i8:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,255,255]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -911,7 +911,7 @@ define void @trunc16i32_16i8(<16 x i32> %a) {
;
; AVX2-LABEL: trunc16i32_16i8:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
@@ -1309,7 +1309,7 @@ define void @trunc32i16_32i8(<32 x i16> %a) {
;
; AVX2-LABEL: trunc32i16_32i8:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
@@ -1745,7 +1745,7 @@ define <32 x i8> @trunc2x16i16_32i8(<16 x i16> %a, <16 x i16> %b) {
;
; AVX2-LABEL: trunc2x16i16_32i8:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index a97a82c041d6e..be720f59d978e 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -659,7 +659,7 @@ define <32 x i1> @interleaved_load_vf32_i8_stride4(ptr %ptr) nounwind {
; AVX2-NEXT: vpblendd {{.*#+}} xmm7 = xmm7[0,1],xmm6[2,3]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm8 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpshufb %ymm8, %ymm1, %ymm9
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm6 = [17179869184,17179869184,17179869184,17179869184]
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm6 = [0,4,0,4,0,4,0,4]
; AVX2-NEXT: vpermd %ymm9, %ymm6, %ymm9
; AVX2-NEXT: vpshufb %ymm8, %ymm0, %ymm8
; AVX2-NEXT: vpermd %ymm8, %ymm6, %ymm8
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
index d87e0084047f6..85bec77fe5eb2 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll
@@ -2738,7 +2738,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastd %xmm0, %ymm3
@@ -2757,7 +2757,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastd %xmm0, %ymm3
@@ -3033,7 +3033,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.v
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512F-NEXT: vpbroadcastq %xmm0, %ymm3
@@ -3052,7 +3052,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.v
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
-; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
; AVX512DQ-NEXT: vpbroadcastq %xmm0, %ymm3
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 34c8845735f4c..2e00b5c9c91a5 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1022,7 +1022,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512F-LABEL: vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512F-NEXT: vpternlogd $202, (%rdi){1to8}, %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1032,7 +1032,7 @@ define void @vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8(ptr %in.e
; AVX512DQ-LABEL: vec256_i8_widen_to_i32_factor4_broadcast_to_v8i32_factor8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512DQ-NEXT: vpternlogd $202, (%rdi){1to8}, %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1120,7 +1120,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512F-LABEL: vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpternlogq $202, (%rdi){1to4}, %ymm0, %ymm1
; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
@@ -1130,7 +1130,7 @@ define void @vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4(ptr %in.e
; AVX512DQ-LABEL: vec256_i8_widen_to_i64_factor8_broadcast_to_v4i64_factor4:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512DQ-NEXT: vpternlogq $202, (%rdi){1to4}, %ymm0, %ymm1
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm0
; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
@@ -2174,7 +2174,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpbroadcastd %xmm0, %ymm3
; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2
@@ -2191,7 +2191,7 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
-; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040,4294967040]
+; AVX512DQ-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpbroadcastd %xmm0, %ymm3
; AVX512DQ-NEXT: vpandn %ymm3, %ymm2, %ymm2
@@ -2426,7 +2426,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
-; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpbroadcastq %xmm0, %ymm3
; AVX512F-NEXT: vpandn %ymm3, %ymm2, %ymm2
@@ -2443,7 +2443,7 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
-; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
+; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX512DQ-NEXT: vpbroadcastq %xmm0, %ymm3
; AVX512DQ-NEXT: vpandn %ymm3, %ymm2, %ymm2
More information about the llvm-commits
mailing list